diff --git a/fuzzers/fuzzbench_tables/.gitignore b/fuzzers/fuzzbench_tables/.gitignore new file mode 100644 index 0000000000..d3561edaf7 --- /dev/null +++ b/fuzzers/fuzzbench_tables/.gitignore @@ -0,0 +1,2 @@ +libpng-* +fuzzer diff --git a/fuzzers/fuzzbench_tables/Cargo.toml b/fuzzers/fuzzbench_tables/Cargo.toml new file mode 100644 index 0000000000..13d208052b --- /dev/null +++ b/fuzzers/fuzzbench_tables/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "fuzzbench" +version = "0.10.1" +authors = ["Andrea Fioraldi ", "Dominik Maier "] +edition = "2021" + +[features] +default = ["std"] +std = [] +no_link_main = ["libafl_targets/libfuzzer_no_link_main"] + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 +debug = true + +[build-dependencies] +cc = { version = "1.0", features = ["parallel"] } +which = { version = "4.0.2" } + +[dependencies] +libafl = { path = "../../libafl/" } +libafl_targets = { path = "../../libafl_targets/", features = ["sancov_pcguard_hitcounts", "sancov_cmplog", "libfuzzer"] } +# TODO Include it only when building cc +libafl_cc = { path = "../../libafl_cc/" } +clap = { version = "4.0", features = ["default"] } +nix = "0.26" +mimalloc = { version = "*", default-features = false } + +[lib] +name = "fuzzbench" +crate-type = ["staticlib"] diff --git a/fuzzers/fuzzbench_tables/Makefile.toml b/fuzzers/fuzzbench_tables/Makefile.toml new file mode 100644 index 0000000000..1cfd6e3b55 --- /dev/null +++ b/fuzzers/fuzzbench_tables/Makefile.toml @@ -0,0 +1,106 @@ +[env] +PROJECT_DIR = { script = ["pwd"] } +CARGO_TARGET_DIR = { value = "${PROJECT_DIR}/target", condition = { env_not_set = ["CARGO_TARGET_DIR"] } } +FUZZER_NAME="fuzzer" + +[tasks.unsupported] +script_runner="@shell" +script=''' +echo "Cargo-make not integrated yet on this" +''' + +# Compilers +[tasks.cxx] +linux_alias = "cxx_unix" +mac_alias = "cxx_unix" +windows_alias = "unsupported" + +[tasks.cxx_unix] +command = "cargo" +args = ["build" , "--release"] + +[tasks.cc] +linux_alias = "cc_unix" +mac_alias = "cc_unix" +windows_alias = "unsupported" + +[tasks.cc_unix] +command = "cargo" +args = ["build" , "--release"] + +# fuzz.o File +[tasks.fuzz_o] +linux_alias = "fuzz_o_unix" +mac_alias = "fuzz_o_unix" +windows_alias = "unsupported" + +[tasks.fuzz_o_unix] +command = "${CARGO_TARGET_DIR}/release/libafl_cc" +args = ["--libafl-no-link", "-O3", "-c", "fuzz.c", "-o", "fuzz.o"] +dependencies = ["cc", "cxx"] + +# Fuzzer +[tasks.fuzzer] +linux_alias = "fuzzer_unix" +mac_alias = "fuzzer_unix" +windows_alias = "unsupported" + +[tasks.fuzzer_unix] +command = "${CARGO_TARGET_DIR}/release/libafl_cxx" +args = ["--libafl", "fuzz.o", "-o", "${FUZZER_NAME}", "-lm", "-lz"] +dependencies = ["cc", "cxx", "fuzz_o"] + +# Run +[tasks.run] +linux_alias = "run_unix" +mac_alias = "run_unix" +windows_alias = "unsupported" + +[tasks.run_unix] +script_runner="@shell" +script=''' +rm -rf libafl_unix_shmem_server || true +mkdir in || true +echo a > in/a +./${FUZZER_NAME} -o out -i in +''' +dependencies = ["fuzzer"] + + +# Test +[tasks.test] +linux_alias = "test_unix" +mac_alias = "test_unix" +windows_alias = "unsupported" + +[tasks.test_unix] +script_runner="@shell" +script=''' +rm -rf libafl_unix_shmem_server || true +mkdir in || true +echo a > in/a +# Allow sigterm as exit code +timeout 11s ./${FUZZER_NAME} -o out -i in >fuzz_stdout.log || true +if [ -z "$(grep "objectives: 10" fuzz_stdout.log)" ]; then + echo "Fuzzer does not generate any testcases or any crashes" + exit 1 +else + echo "Fuzzer is working" +fi +rm -rf out || true +rm -rf in || true +''' +dependencies = ["fuzzer"] + +# Clean +[tasks.clean] +linux_alias = "clean_unix" +mac_alias = "clean_unix" +windows_alias = "unsupported" + +[tasks.clean_unix] +script_runner="@shell" +script=''' +rm ./${FUZZER_NAME} || true +rm fuzz.o || true +''' diff --git a/fuzzers/fuzzbench_tables/README.md b/fuzzers/fuzzbench_tables/README.md new file mode 100644 index 0000000000..df34f5e090 --- /dev/null +++ b/fuzzers/fuzzbench_tables/README.md @@ -0,0 +1,17 @@ +# Fuzzbench Harness + +This folder contains an example fuzzer tailored for fuzzbench. +It uses the best possible setting, with the exception of a SimpleRestartingEventManager instead of an LlmpEventManager - since fuzzbench is single threaded. +Real fuzz campaigns should consider using multithreaded LlmpEventManager, see the other examples. + +## Build + +To build this example, run `cargo build --release`. +This will build the fuzzer compilers (`libafl_cc` and `libafl_cpp`) with `src/lib.rs` as fuzzer. +The fuzzer uses the libfuzzer compatibility layer and the SanitizerCoverage runtime functions for coverage feedback. + +These can then be used to build libfuzzer harnesses in the software project of your choice. +Finally, just run the resulting binary with `out_dir`, `in_dir`. + +In any real-world scenario, you should use `taskset` to pin each client to an empty CPU core, the lib does not pick an empty core automatically (yet). + diff --git a/fuzzers/fuzzbench_tables/fuzz.c b/fuzzers/fuzzbench_tables/fuzz.c new file mode 100644 index 0000000000..9175059e59 --- /dev/null +++ b/fuzzers/fuzzbench_tables/fuzz.c @@ -0,0 +1,15 @@ +#include +#include + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size >= 8 && *(uint32_t *)Data == 0xaabbccdd) { abort(); } + return 0; +} + +/* +int main() { + + char buf [10] = {0}; + LLVMFuzzerTestOneInput(buf, 10); + +}*/ diff --git a/fuzzers/fuzzbench_tables/src/bin/libafl_cc.rs b/fuzzers/fuzzbench_tables/src/bin/libafl_cc.rs new file mode 100644 index 0000000000..9f3703f49a --- /dev/null +++ b/fuzzers/fuzzbench_tables/src/bin/libafl_cc.rs @@ -0,0 +1,46 @@ +use std::env; + +use libafl_cc::{ClangWrapper, CompilerWrapper, LLVMPasses, ToolWrapper}; + +pub fn main() { + let mut args: Vec = env::args().collect(); + if args.len() > 1 { + let mut dir = env::current_exe().unwrap(); + let wrapper_name = dir.file_name().unwrap().to_str().unwrap(); + + let is_cpp = match wrapper_name[wrapper_name.len()-2..].to_lowercase().as_str() { + "cc" => false, + "++" | "pp" | "xx" => true, + _ => panic!("Could not figure out if c or c++ wrapper was called. Expected {dir:?} to end with c or cxx"), + }; + + dir.pop(); + + // Must be always present, even without --libafl + args.push("-fsanitize-coverage=trace-pc-guard,trace-cmp".into()); + + let mut cc = ClangWrapper::new(); + + #[cfg(any(target_os = "linux", target_vendor = "apple"))] + cc.add_pass(LLVMPasses::AutoTokens); + + if let Some(code) = cc + .cpp(is_cpp) + // silence the compiler wrapper output, needed for some configure scripts. + .silence(true) + // add arguments only if --libafl or --libafl-no-link are present + .need_libafl_arg(true) + .parse_args(&args) + .expect("Failed to parse the command line") + .link_staticlib(&dir, "fuzzbench") + .add_pass(LLVMPasses::CmpLogRtn) + .add_pass(LLVMPasses::Tables) + .run() + .expect("Failed to run the wrapped compiler") + { + std::process::exit(code); + } + } else { + panic!("LibAFL CC: No Arguments given"); + } +} diff --git a/fuzzers/fuzzbench_tables/src/bin/libafl_cxx.rs b/fuzzers/fuzzbench_tables/src/bin/libafl_cxx.rs new file mode 100644 index 0000000000..dabd22971a --- /dev/null +++ b/fuzzers/fuzzbench_tables/src/bin/libafl_cxx.rs @@ -0,0 +1,5 @@ +pub mod libafl_cc; + +fn main() { + libafl_cc::main(); +} diff --git a/fuzzers/fuzzbench_tables/src/lib.rs b/fuzzers/fuzzbench_tables/src/lib.rs new file mode 100644 index 0000000000..d18b3e8f4a --- /dev/null +++ b/fuzzers/fuzzbench_tables/src/lib.rs @@ -0,0 +1,405 @@ +//! A singlethreaded libfuzzer-like fuzzer that can auto-restart. +use mimalloc::MiMalloc; +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; + +use core::{cell::RefCell, time::Duration}; +#[cfg(unix)] +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::{ + env, + fs::{self, File, OpenOptions}, + io::{self, Read, Write}, + path::PathBuf, + process, +}; + +use clap::{Arg, Command}; +use libafl::{ + bolts::{ + current_nanos, current_time, + os::dup2, + rands::StdRand, + shmem::{ShMemProvider, StdShMemProvider}, + tuples::{tuple_list, Merge}, + AsSlice, + }, + corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus}, + events::SimpleRestartingEventManager, + executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor}, + feedback_or, + feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback}, + fuzzer::{Fuzzer, StdFuzzer}, + inputs::{BytesInput, HasTargetBytes}, + monitors::SimpleMonitor, + mutators::{ + scheduled::havoc_mutations, token_mutations::I2SRandReplace, tokens_mutations, + StdMOptMutator, StdScheduledMutator, Tokens, + }, + observers::{ConstMapObserver, HitcountsMapObserver, TimeObserver}, + schedulers::{ + powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler, + }, + stages::{ + calibrate::CalibrationStage, power::StdPowerMutationalStage, StdMutationalStage, + TracingStage, + }, + state::{HasCorpus, HasMetadata, StdState}, + Error, +}; +#[cfg(any(target_os = "linux", target_vendor = "apple"))] +use libafl_targets::autotokens; +use libafl_targets::{ + libfuzzer_initialize, libfuzzer_test_one_input, std_edges_map_observer, CmpLogObserver, + TABLES_MAP, TABLES_MAP_SIZE, +}; +#[cfg(unix)] +use nix::{self, unistd::dup}; + +/// The fuzzer main (as `no_mangle` C function) +#[no_mangle] +pub fn libafl_main() { + // Registry the metadata types used in this fuzzer + // Needed only on no_std + //RegistryBuilder::register::(); + + let res = match Command::new(env!("CARGO_PKG_NAME")) + .version(env!("CARGO_PKG_VERSION")) + .author("AFLplusplus team") + .about("LibAFL-based fuzzer for Fuzzbench") + .arg( + Arg::new("out") + .short('o') + .long("output") + .help("The directory to place finds in ('corpus')"), + ) + .arg( + Arg::new("in") + .short('i') + .long("input") + .help("The directory to read initial inputs from ('seeds')"), + ) + .arg( + Arg::new("tokens") + .short('x') + .long("tokens") + .help("A file to read tokens from, to be used during fuzzing"), + ) + .arg( + Arg::new("logfile") + .short('l') + .long("logfile") + .help("Duplicates all output to this file") + .default_value("libafl.log"), + ) + .arg( + Arg::new("timeout") + .short('t') + .long("timeout") + .help("Timeout for each individual execution, in milliseconds") + .default_value("1200"), + ) + .arg(Arg::new("remaining")) + .try_get_matches() + { + Ok(res) => res, + Err(err) => { + println!( + "Syntax: {}, [-x dictionary] -o corpus_dir -i seed_dir\n{:?}", + env::current_exe() + .unwrap_or_else(|_| "fuzzer".into()) + .to_string_lossy(), + err, + ); + return; + } + }; + + println!( + "Workdir: {:?}", + env::current_dir().unwrap().to_string_lossy().to_string() + ); + + if let Some(filenames) = res.get_many::("remaining") { + let filenames: Vec<&str> = filenames.map(String::as_str).collect(); + if !filenames.is_empty() { + run_testcases(&filenames); + return; + } + } + + // For fuzzbench, crashes and finds are inside the same `corpus` directory, in the "queue" and "crashes" subdir. + let mut out_dir = PathBuf::from( + res.get_one::("out") + .expect("The --output parameter is missing") + .to_string(), + ); + if fs::create_dir(&out_dir).is_err() { + println!("Out dir at {:?} already exists.", &out_dir); + if !out_dir.is_dir() { + println!("Out dir at {:?} is not a valid directory!", &out_dir); + return; + } + } + let mut crashes = out_dir.clone(); + crashes.push("crashes"); + out_dir.push("queue"); + + let in_dir = PathBuf::from( + res.get_one::("in") + .expect("The --input parameter is missing") + .to_string(), + ); + if !in_dir.is_dir() { + println!("In dir at {:?} is not a valid directory!", &in_dir); + return; + } + + let tokens = res.get_one::("tokens").map(PathBuf::from); + + let logfile = PathBuf::from(res.get_one::("logfile").unwrap().to_string()); + + let timeout = Duration::from_millis( + res.get_one::("timeout") + .unwrap() + .to_string() + .parse() + .expect("Could not parse timeout in milliseconds"), + ); + + fuzz(out_dir, crashes, &in_dir, tokens, &logfile, timeout) + .expect("An error occurred while fuzzing"); +} + +fn run_testcases(filenames: &[&str]) { + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1"); + } + + println!( + "You are not fuzzing, just executing {} testcases", + filenames.len() + ); + for fname in filenames { + println!("Executing {fname}"); + + let mut file = File::open(fname).expect("No file found"); + let mut buffer = vec![]; + file.read_to_end(&mut buffer).expect("Buffer overflow"); + + libfuzzer_test_one_input(&buffer); + } +} + +/// The actual fuzzer +#[allow(clippy::too_many_lines)] +fn fuzz( + corpus_dir: PathBuf, + objective_dir: PathBuf, + seed_dir: &PathBuf, + tokenfile: Option, + logfile: &PathBuf, + timeout: Duration, +) -> Result<(), Error> { + let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?); + + #[cfg(unix)] + let mut stdout_cpy = unsafe { + let new_fd = dup(io::stdout().as_raw_fd())?; + File::from_raw_fd(new_fd) + }; + #[cfg(unix)] + let file_null = File::open("/dev/null")?; + + // 'While the monitor are state, they are usually used in the broker - which is likely never restarted + let monitor = SimpleMonitor::new(|s| { + #[cfg(unix)] + writeln!(&mut stdout_cpy, "{s}").unwrap(); + #[cfg(windows)] + println!("{s}"); + writeln!(log.borrow_mut(), "{:?} {s}", current_time()).unwrap(); + }); + + // We need a shared map to store our state before a crash. + // This way, we are able to continue fuzzing afterwards. + let mut shmem_provider = StdShMemProvider::new()?; + + let (state, mut mgr) = match SimpleRestartingEventManager::launch(monitor, &mut shmem_provider) + { + // The restarting state will spawn the same process again as child, then restarted it each time it crashes. + Ok(res) => res, + Err(err) => match err { + Error::ShuttingDown => { + return Ok(()); + } + _ => { + panic!("Failed to setup the restarter: {err}"); + } + }, + }; + + // Create an observation channel using the coverage map + // We don't use the hitcounts (see the Cargo.toml, we use pcguard_edges) + let edges_observer = HitcountsMapObserver::new(unsafe { std_edges_map_observer("edges") }); + + let tables_observer = + ConstMapObserver::::new("tables", unsafe { &mut TABLES_MAP }); + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + let cmplog_observer = CmpLogObserver::new("cmplog", true); + + let map_feedback = MaxMapFeedback::tracking(&edges_observer, true, false); + + let tables_feedback = MaxMapFeedback::new(&tables_observer); + + let calibration = CalibrationStage::new(&map_feedback); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let mut feedback = feedback_or!( + // New maximization map feedback linked to the edges observer and the feedback state + map_feedback, + tables_feedback, + // Time feedback, this one does not need a feedback state + TimeFeedback::with_observer(&time_observer) + ); + + // A feedback to choose if an input is a solution or not + let mut objective = CrashFeedback::new(); + + // If not restarting, create a State from scratch + let mut state = state.unwrap_or_else(|| { + StdState::new( + // RNG + StdRand::with_seed(current_nanos()), + // Corpus that will be evolved, we keep it in memory for performance + InMemoryOnDiskCorpus::new(corpus_dir).unwrap(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(objective_dir).unwrap(), + // States of the feedbacks. + // The feedbacks can report the data that should persist in the State. + &mut feedback, + // Same for objective feedbacks + &mut objective, + ) + .unwrap() + }); + + println!("Let's fuzz :)"); + + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1"); + } + + // Setup a randomic Input2State stage + let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); + + // Setup a MOPT mutator + let mutator = StdMOptMutator::new( + &mut state, + havoc_mutations().merge(tokens_mutations()), + 7, + 5, + )?; + + let power = StdPowerMutationalStage::new(mutator); + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = IndexesLenTimeMinimizerScheduler::new(StdWeightedScheduler::with_schedule( + &mut state, + &edges_observer, + Some(PowerSchedule::FAST), + )); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + // The wrapped harness function, calling out to the LLVM-style harness + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + libfuzzer_test_one_input(buf); + ExitKind::Ok + }; + + let mut tracing_harness = harness; + + // Create the executor for an in-process function with one observer for edge coverage and one for the execution time + let mut executor = TimeoutExecutor::new( + InProcessExecutor::new( + &mut harness, + tuple_list!(edges_observer, tables_observer, time_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + timeout, + ); + + // Setup a tracing stage in which we log comparisons + let tracing = TracingStage::new(TimeoutExecutor::new( + InProcessExecutor::new( + &mut tracing_harness, + tuple_list!(cmplog_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + // Give it more time! + timeout * 10, + )); + + // The order of the stages matter! + let mut stages = tuple_list!(calibration, tracing, i2s, power); + + // Read tokens + if state.metadata_map().get::().is_none() { + let mut toks = Tokens::default(); + if let Some(tokenfile) = tokenfile { + toks.add_from_file(tokenfile)?; + } + #[cfg(any(target_os = "linux", target_vendor = "apple"))] + { + toks += autotokens()?; + } + + if !toks.is_empty() { + state.add_metadata(toks); + } + } + + // In case the corpus is empty (on first run), reset + if state.must_load_initial_inputs() { + state + .load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[seed_dir.clone()]) + .unwrap_or_else(|_| { + println!("Failed to load initial corpus at {:?}", &seed_dir); + process::exit(0); + }); + println!("We imported {} inputs from disk.", state.corpus().count()); + } + + // Remove target ouput (logs still survive) + #[cfg(unix)] + { + let null_fd = file_null.as_raw_fd(); + dup2(null_fd, io::stdout().as_raw_fd())?; + dup2(null_fd, io::stderr().as_raw_fd())?; + } + // reopen file to make sure we're at the end + log.replace(OpenOptions::new().append(true).create(true).open(logfile)?); + + fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; + + // Never reached + Ok(()) +} diff --git a/fuzzers/fuzzbench_tables/stub_rt.c b/fuzzers/fuzzbench_tables/stub_rt.c new file mode 100644 index 0000000000..3e9c1a5c22 --- /dev/null +++ b/fuzzers/fuzzbench_tables/stub_rt.c @@ -0,0 +1,29 @@ +#include + +__attribute__ ((weak)) void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { +} + +__attribute__ ((weak)) void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { +} + +__attribute__ ((weak)) void __cmplog_rtn_hook(uint8_t *ptr1, uint8_t *ptr2) { +} + +__attribute__ ((weak)) void __cmplog_rtn_gcc_stdstring_cstring(uint8_t *stdstring, uint8_t *cstring) { +} + +__attribute__ ((weak)) void __cmplog_rtn_gcc_stdstring_stdstring(uint8_t *stdstring1, uint8_t *stdstring2) { +} + +__attribute__ ((weak)) void __cmplog_rtn_llvm_stdstring_cstring(uint8_t *stdstring, uint8_t *cstring) { +} + +__attribute__ ((weak)) void __cmplog_rtn_llvm_stdstring_stdstring(uint8_t *stdstring1, uint8_t *stdstring2) { +} + +__attribute__ ((weak)) void __libafl_tables_transition(uint32_t arg1, uint32_t arg2) { +} + +extern void libafl_main(void); + +int main(int argc, char **argv) { libafl_main(); return 0; } diff --git a/libafl/src/monitors/multi.rs b/libafl/src/monitors/multi.rs index 30837b6363..d6815c8e88 100644 --- a/libafl/src/monitors/multi.rs +++ b/libafl/src/monitors/multi.rs @@ -3,14 +3,17 @@ #[cfg(feature = "introspection")] use alloc::string::ToString; use alloc::{string::String, vec::Vec}; -use core::{fmt::Write, time::Duration}; +use core::{ + fmt::{Debug, Write}, + time::Duration, +}; use libafl_bolts::{current_time, format_duration_hms, ClientId}; use crate::monitors::{ClientStats, Monitor}; /// Tracking monitor during fuzzing and display both per-client and cumulative info. -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct MultiMonitor where F: FnMut(String), @@ -20,6 +23,18 @@ where client_stats: Vec, } +impl Debug for MultiMonitor +where + F: FnMut(String), +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("SimpleMonitor") + .field("start_time", &self.start_time) + .field("client_stats", &self.client_stats) + .finish_non_exhaustive() + } +} + impl Monitor for MultiMonitor where F: FnMut(String), diff --git a/libafl_cc/build.rs b/libafl_cc/build.rs index c1deeb5fd9..ca7634fe63 100644 --- a/libafl_cc/build.rs +++ b/libafl_cc/build.rs @@ -397,6 +397,7 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; "afl-coverage-pass.cc", "autotokens-pass.cc", "coverage-accounting-pass.cc", + "tables-pass.cc", ] { build_pass( bindir_path, diff --git a/libafl_cc/src/clang.rs b/libafl_cc/src/clang.rs index 0f07a4a48b..fab54b98c0 100644 --- a/libafl_cc/src/clang.rs +++ b/libafl_cc/src/clang.rs @@ -39,6 +39,8 @@ pub enum LLVMPasses { AutoTokens, /// The Coverage Accouting (BB metric) pass CoverageAccounting, + /// Coverage from parsing tables + Tables, /// The dump cfg pass DumpCfg, } @@ -57,6 +59,9 @@ impl LLVMPasses { } LLVMPasses::CoverageAccounting => PathBuf::from(env!("OUT_DIR")) .join(format!("coverage-accounting-pass.{}", dll_extension())), + LLVMPasses::Tables => { + PathBuf::from(env!("OUT_DIR")).join(format!("tables-pass.{}", dll_extension())) + } LLVMPasses::DumpCfg => { PathBuf::from(env!("OUT_DIR")).join(format!("dump-cfg-pass.{}", dll_extension())) } diff --git a/libafl_cc/src/no-link-rt.c b/libafl_cc/src/no-link-rt.c index 08c069bd45..80cc12bf42 100644 --- a/libafl_cc/src/no-link-rt.c +++ b/libafl_cc/src/no-link-rt.c @@ -11,6 +11,11 @@ void __libafl_targets_cmplog_instructions(uintptr_t k, uint8_t shape, (void)arg2; } +void __libafl_tables_transition(uint32_t arg1, uint32_t arg2) { + (void)arg1; + (void)arg2; +} + void __cmplog_rtn_hook(uint8_t *ptr1, uint8_t *ptr2) { (void)ptr1; (void)ptr2; diff --git a/libafl_cc/src/tables-pass.cc b/libafl_cc/src/tables-pass.cc new file mode 100644 index 0000000000..2310d98569 --- /dev/null +++ b/libafl_cc/src/tables-pass.cc @@ -0,0 +1,233 @@ +/* + LibAFL - Parsing tables coverage LLVM pass + -------------------------------------------------- + + Written by Andrea Fioraldi + + Copyright 2023 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +#include "common-llvm.h" + +#include + +#include +#include +#include +#include + +#include "llvm/Support/CommandLine.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" + +// Without this, Can't build with llvm-14 & old PM +#if LLVM_VERSION_MAJOR >= 14 && !defined(USE_NEW_PM) + #include "llvm/Pass.h" +#endif + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/DebugInfo.h" + #include "llvm/IR/CFG.h" +#else + #include "llvm/DebugInfo.h" + #include "llvm/Support/CFG.h" +#endif + +using namespace llvm; + +static cl::opt Debug("debug", cl::desc("Debug prints"), cl::init(false), + cl::NotHidden); + +namespace { + +Value *recurseCast(Value *V) { + CastInst *CI; + if ((CI = dyn_cast(V))) { return recurseCast(CI->getOperand(0)); } + return V; +} + +#ifdef USE_NEW_PM +class TamingParsingTables : public PassInfoMixin { + public: + TamingParsingTables() { +#else +class TamingParsingTables : public ModulePass { + public: + static char ID; + TamingParsingTables() : ModulePass(ID) { +#endif + // initInstrumentList(); + } + +#ifdef USE_NEW_PM + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +#else + bool runOnModule(Module &M) override; +#endif + + protected: + uint32_t function_minimum_size = 1; +}; + +} // namespace + +#ifdef USE_NEW_PM +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "TamingParsingTables", "v0.1", + /* lambda to insert our pass into the pass pipeline. */ + [](PassBuilder &PB) { + #if 1 + #if LLVM_VERSION_MAJOR <= 13 + using OptimizationLevel = typename PassBuilder::OptimizationLevel; + #endif + PB.registerOptimizerLastEPCallback( + [](ModulePassManager &MPM, OptimizationLevel OL) { + MPM.addPass(TamingParsingTables()); + }); + /* TODO LTO registration */ + #else + using PipelineElement = typename PassBuilder::PipelineElement; + PB.registerPipelineParsingCallback([](StringRef Name, + ModulePassManager &MPM, + ArrayRef) { + if (Name == "TamingParsingTables") { + MPM.addPass(TamingParsingTables()); + return true; + } else { + return false; + } + }); + #endif + }}; +} +#else + +char TamingParsingTables::ID = 0; +#endif + +#ifdef USE_NEW_PM +PreservedAnalyses TamingParsingTables::run(Module &M, + ModuleAnalysisManager &MAM) { +#else +bool TamingParsingTables::runOnModule(Module &M) { +#endif + + LLVMContext &C = M.getContext(); + + IntegerType *Int64Ty = IntegerType::getInt64Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *Int16Ty = IntegerType::getInt16Ty(C); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int1Ty = IntegerType::getInt1Ty(C); + Type *VoidTy = Type::getVoidTy(C); + + FunctionCallee LogFunc = M.getOrInsertFunction("__libafl_tables_transition", + VoidTy, Int32Ty, Int32Ty); + +#ifdef USE_NEW_PM + auto PA = PreservedAnalyses::all(); +#endif + + /* Instrument all the things! */ + + for (auto &F : M) { + int has_calls = 0; + + // if (!isInInstrumentList(&F)) { continue; } + + if (F.size() < function_minimum_size) { continue; } + + std::unordered_set loads; + std::unordered_set geps; + + for (auto &BB : F) { + for (auto &I : BB) { + GetElementPtrInst *GEP; + StoreInst *ST; + LoadInst *LI; + if ((LI = dyn_cast(&I))) { + loads.insert(LI); + } else if ((GEP = dyn_cast(&I))) { + if (!GEP->hasIndices() || GEP->hasAllConstantIndices()) continue; + // TODO handle multiple idxs + Value *IDX = *GEP->idx_begin(); + IDX = recurseCast(IDX); + + if ((LI = dyn_cast(IDX)) && loads.find(LI) != loads.end()) { + geps.insert(GEP); + } + } else if ((ST = dyn_cast(&I))) { + Value *PTR = ST->getPointerOperand(); // must be in a prev load + Value *VAL = recurseCast(ST->getValueOperand()); + LoadInst *GL = nullptr; + Value *V = nullptr; + + if ((GL = dyn_cast(VAL))) { + V = GL->getPointerOperand(); + if (V == nullptr || geps.find(V) == geps.end()) { continue; } + } else { + continue; + } + + // the value comes from a load in which the ptr is obtained with a gep + GEP = (GetElementPtrInst *)V; + Value *IDX = *GEP->idx_begin(); + IDX = recurseCast(IDX); + LI = dyn_cast(IDX); + + if (LI == nullptr) continue; + if (recurseCast(LI->getPointerOperand()) != + recurseCast(ST->getPointerOperand())) + continue; + + if (Debug) { + std::string location = std::string("UNKNOWN"); + if (DILocation *Loc = GEP->getDebugLoc().get()) { + location = std::string(Loc->getFilename().data()) + + std::string(":") + std::to_string(Loc->getLine()); + } + + errs() << "FOUND " << location << "\n\t" << *LI << "\n\t" << *GEP + << "\n\t" << *ST << "\n\n"; + } + + IRBuilder<> IRB(ST); + Value *A1 = IRB.CreateIntCast(LI, Int32Ty, false); + Value *A2 = IRB.CreateIntCast(ST->getValueOperand(), Int32Ty, false); + IRB.CreateCall(LogFunc, {A1, A2}); + } + } + } + } + +#ifdef USE_NEW_PM + return PA; +#else + return true; +#endif +} + +#ifndef USE_NEW_PM +static void registerTablesPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + PM.add(new TamingParsingTables()); +} + +static RegisterStandardPasses RegisterTablesPass( + PassManagerBuilder::EP_OptimizerLast, registerTablesPass); + +static RegisterStandardPasses RegisterTablesPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerTablesPass); +#endif diff --git a/libafl_targets/build.rs b/libafl_targets/build.rs index b7583ad140..a527dc6100 100644 --- a/libafl_targets/build.rs +++ b/libafl_targets/build.rs @@ -27,6 +27,9 @@ fn main() { let acc_map_size: usize = option_env!("LIBAFL_ACCOUNTING_MAP_SIZE") .map_or(Ok(65536), str::parse) .expect("Could not parse LIBAFL_ACCOUNTING_MAP_SIZE"); + let tables_map_size: usize = option_env!("LIBAFL_TABLES_MAP_SIZE") + .map_or(Ok(16384), str::parse) + .expect("Could not parse LIBAFL_TABLES_MAP_SIZE"); write!( constants_file, @@ -42,6 +45,8 @@ fn main() { pub const CMPLOG_MAP_H: usize = {cmplog_map_h}; /// The size of the accounting maps pub const ACCOUNTING_MAP_SIZE: usize = {acc_map_size}; + /// The size of the tables map + pub const TABLES_MAP_SIZE: usize = {tables_map_size}; " ) .expect("Could not write file"); @@ -51,6 +56,7 @@ fn main() { println!("cargo:rerun-if-env-changed=LIBAFL_CMPLOG_MAP_W"); println!("cargo:rerun-if-env-changed=LIBAFL_CMPLOG_MAP_H"); println!("cargo:rerun-if-env-changed=LIBAFL_ACCOUNTING_MAP_SIZE"); + println!("cargo:rerun-if-env-changed=LIBAFL_TABLES_MAP_SIZE"); //std::env::set_var("CC", "clang"); //std::env::set_var("CXX", "clang++"); diff --git a/libafl_targets/src/lib.rs b/libafl_targets/src/lib.rs index 5c619f0f94..854ef3efef 100644 --- a/libafl_targets/src/lib.rs +++ b/libafl_targets/src/lib.rs @@ -98,6 +98,9 @@ pub use value_profile::*; pub mod cmplog; pub use cmplog::*; +pub mod tables; +pub use tables::*; + #[cfg(feature = "std")] pub mod drcov; diff --git a/libafl_targets/src/tables.rs b/libafl_targets/src/tables.rs new file mode 100644 index 0000000000..9361ab9892 --- /dev/null +++ b/libafl_targets/src/tables.rs @@ -0,0 +1,32 @@ +//! Tables state transition pass runtime for `LibAFL`. + +use crate::TABLES_MAP_SIZE; + +/// Map with tables transitions +pub static mut TABLES_MAP: [u8; TABLES_MAP_SIZE] = [0; TABLES_MAP_SIZE]; + +fn merge_u32(a: u32, b: u32) -> u64 { + (u64::from(a) << 32) + u64::from(b) +} + +/// From +fn splitmix64(target: u64, seed: u64) -> u64 { + let sp_step = 0x9E3779B97F4A7C15_u64; + let mut out = target; + out += seed * sp_step; + out ^= out >> 30; + out *= 0xBF58476D1CE4E5B9_u64; + out ^= out >> 27; + out *= 0x94D049BB133111EB_u64; + out ^= out >> 31; + out +} + +#[no_mangle] +/// Log tables transitions and insert them in the map +pub extern "C" fn __libafl_tables_transition(cur: u32, next: u32) { + let hash = splitmix64(merge_u32(cur, next), 52) as usize % TABLES_MAP_SIZE; + unsafe { + TABLES_MAP[hash] = 1; + } +}