From 4505424b2393e8e986e830cc18dd9f0571f3a303 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Wed, 12 Jul 2023 13:48:25 +0200 Subject: [PATCH 1/5] Parsing tables access based coverage --- libafl_cc/build.rs | 1 + libafl_cc/src/clang.rs | 5 + libafl_cc/src/no-link-rt.c | 5 + libafl_cc/src/tables-pass.cc | 229 +++++++++++++++++++++++++++++++++++ libafl_targets/build.rs | 6 + libafl_targets/src/lib.rs | 3 + libafl_targets/src/tables.rs | 32 +++++ 7 files changed, 281 insertions(+) create mode 100644 libafl_cc/src/tables-pass.cc create mode 100644 libafl_targets/src/tables.rs diff --git a/libafl_cc/build.rs b/libafl_cc/build.rs index c1deeb5fd9..ca7634fe63 100644 --- a/libafl_cc/build.rs +++ b/libafl_cc/build.rs @@ -397,6 +397,7 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; "afl-coverage-pass.cc", "autotokens-pass.cc", "coverage-accounting-pass.cc", + "tables-pass.cc", ] { build_pass( bindir_path, diff --git a/libafl_cc/src/clang.rs b/libafl_cc/src/clang.rs index 0f07a4a48b..a7c8509546 100644 --- a/libafl_cc/src/clang.rs +++ b/libafl_cc/src/clang.rs @@ -39,6 +39,8 @@ pub enum LLVMPasses { AutoTokens, /// The Coverage Accouting (BB metric) pass CoverageAccounting, + /// Coverage from parsing tables + Tables, /// The dump cfg pass DumpCfg, } @@ -57,6 +59,9 @@ impl LLVMPasses { } LLVMPasses::CoverageAccounting => PathBuf::from(env!("OUT_DIR")) .join(format!("coverage-accounting-pass.{}", dll_extension())), + LLVMPasses::Tables => { + PathBuf::from(env!("OUT_DIR")).join(format!("tabes-pass.{}", dll_extension())) + } LLVMPasses::DumpCfg => { PathBuf::from(env!("OUT_DIR")).join(format!("dump-cfg-pass.{}", dll_extension())) } diff --git a/libafl_cc/src/no-link-rt.c b/libafl_cc/src/no-link-rt.c index 08c069bd45..80cc12bf42 100644 --- a/libafl_cc/src/no-link-rt.c +++ b/libafl_cc/src/no-link-rt.c @@ -11,6 +11,11 @@ void __libafl_targets_cmplog_instructions(uintptr_t k, uint8_t shape, (void)arg2; } +void __libafl_tables_transition(uint32_t arg1, uint32_t arg2) { + (void)arg1; + (void)arg2; +} + void __cmplog_rtn_hook(uint8_t *ptr1, uint8_t *ptr2) { (void)ptr1; (void)ptr2; diff --git a/libafl_cc/src/tables-pass.cc b/libafl_cc/src/tables-pass.cc new file mode 100644 index 0000000000..4949f34806 --- /dev/null +++ b/libafl_cc/src/tables-pass.cc @@ -0,0 +1,229 @@ +/* + LibAFL - Coverage accounting LLVM pass + -------------------------------------------------- + + Written by Andrea Fioraldi + + Copyright 2023 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +#include "common-llvm.h" + +#include + +#include +#include +#include +#include + +#include "llvm/Support/CommandLine.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" + +// Without this, Can't build with llvm-14 & old PM +#if LLVM_VERSION_MAJOR >= 14 && !defined(USE_NEW_PM) + #include "llvm/Pass.h" +#endif + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/DebugInfo.h" + #include "llvm/IR/CFG.h" +#else + #include "llvm/DebugInfo.h" + #include "llvm/Support/CFG.h" +#endif + +using namespace llvm; + +static cl::opt Debug("debug", cl::desc("Debug prints"), cl::init(false), + cl::NotHidden); + +namespace { + +Value *recurseCast(Value *V) { + CastInst *CI; + if ((CI = dyn_cast(V))) { return recurseCast(CI->getOperand(0)); } + return V; +} + +#ifdef USE_NEW_PM +class TamingParsingTables : public PassInfoMixin { + public: + TamingParsingTables() { +#else +class TamingParsingTables : public ModulePass { + public: + static char ID; + TamingParsingTables() : ModulePass(ID) { +#endif + // initInstrumentList(); + } + +#ifdef USE_NEW_PM + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +#else + bool runOnModule(Module &M) override; +#endif + + protected: + uint32_t function_minimum_size = 1; +}; + +} // namespace + +#ifdef USE_NEW_PM +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "TamingParsingTables", "v0.1", + /* lambda to insert our pass into the pass pipeline. */ + [](PassBuilder &PB) { + #if 1 + #if LLVM_VERSION_MAJOR <= 13 + using OptimizationLevel = typename PassBuilder::OptimizationLevel; + #endif + PB.registerOptimizerLastEPCallback( + [](ModulePassManager &MPM, OptimizationLevel OL) { + MPM.addPass(TamingParsingTables()); + }); + /* TODO LTO registration */ + #else + using PipelineElement = typename PassBuilder::PipelineElement; + PB.registerPipelineParsingCallback([](StringRef Name, + ModulePassManager &MPM, + ArrayRef) { + if (Name == "TamingParsingTables") { + MPM.addPass(TamingParsingTables()); + return true; + } else { + return false; + } + }); + #endif + }}; +} +#else + +char TamingParsingTables::ID = 0; +#endif + +#ifdef USE_NEW_PM +PreservedAnalyses TamingParsingTables::run(Module &M, ModuleAnalysisManager &MAM) { +#else +bool TamingParsingTables::runOnModule(Module &M) { +#endif + + LLVMContext &C = M.getContext(); + + IntegerType *Int64Ty = IntegerType::getInt64Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *Int16Ty = IntegerType::getInt16Ty(C); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int1Ty = IntegerType::getInt1Ty(C); + Type *VoidTy = Type::getVoidTy(C); + + FunctionCallee LogFunc = M.getOrInsertFunction("__libafl_tables_transition", VoidTy, Int32Ty, Int32Ty); + +#ifdef USE_NEW_PM + auto PA = PreservedAnalyses::all(); +#endif + + /* Instrument all the things! */ + + for (auto &F : M) { + int has_calls = 0; + + // if (!isInInstrumentList(&F)) { continue; } + + if (F.size() < function_minimum_size) { continue; } + + std::unordered_set loads; + std::unordered_set geps; + + for (auto &BB : F) { + for (auto &I : BB) { + GetElementPtrInst *GEP; + StoreInst *ST; + LoadInst *LI; + if ((LI = dyn_cast(&I))) { + loads.insert(LI); + } else if ((GEP = dyn_cast(&I))) { + if (!GEP->hasIndices() || GEP->hasAllConstantIndices()) continue; + // TODO handle multiple idxs + Value *IDX = *GEP->idx_begin(); + IDX = recurseCast(IDX); + + if ((LI = dyn_cast(IDX)) && loads.contains(LI)) { + geps.insert(GEP); + } + } else if ((ST = dyn_cast(&I))) { + Value *PTR = ST->getPointerOperand(); // must be in a prev load + Value *VAL = recurseCast(ST->getValueOperand()); + LoadInst *GL = nullptr; + Value *V = nullptr; + + if ((GL = dyn_cast(VAL))) { + V = GL->getPointerOperand(); + if (V == nullptr || !geps.contains(V)) { continue; } + } else { + continue; + } + + // the value comes from a load in which the ptr is obtained with a gep + GEP = (GetElementPtrInst *)V; + Value *IDX = *GEP->idx_begin(); + IDX = recurseCast(IDX); + LI = dyn_cast(IDX); + + if (LI == nullptr) continue; + if (recurseCast(LI->getPointerOperand()) != + recurseCast(ST->getPointerOperand())) + continue; + + std::string location = std::string("UNKNOWN"); + if (DILocation *Loc = GEP->getDebugLoc().get()) { + location = std::string(Loc->getFilename().data()) + + std::string(":") + std::to_string(Loc->getLine()); + } + + errs() << "FOUND " << location << "\n\t" << *LI << "\n\t" << *GEP + << "\n\t" << *ST << "\n\n"; + + IRBuilder<> IRB(ST); + Value *A1 = IRB.CreateIntCast(LI, Int32Ty, false); + Value *A2 = IRB.CreateIntCast(ST->getValueOperand(), Int32Ty, false); + IRB.CreateCall(LogFunc, {A1, A2}); + } + } + } + } + +#ifdef USE_NEW_PM + return PA; +#else + return true; +#endif +} + +#ifndef USE_NEW_PM +static void registerAFLPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + PM.add(new TamingParsingTables()); +} + +static RegisterStandardPasses RegisterAFLPass( + PassManagerBuilder::EP_OptimizerLast, registerAFLPass); + +static RegisterStandardPasses RegisterAFLPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass); +#endif diff --git a/libafl_targets/build.rs b/libafl_targets/build.rs index b7583ad140..a527dc6100 100644 --- a/libafl_targets/build.rs +++ b/libafl_targets/build.rs @@ -27,6 +27,9 @@ fn main() { let acc_map_size: usize = option_env!("LIBAFL_ACCOUNTING_MAP_SIZE") .map_or(Ok(65536), str::parse) .expect("Could not parse LIBAFL_ACCOUNTING_MAP_SIZE"); + let tables_map_size: usize = option_env!("LIBAFL_TABLES_MAP_SIZE") + .map_or(Ok(16384), str::parse) + .expect("Could not parse LIBAFL_TABLES_MAP_SIZE"); write!( constants_file, @@ -42,6 +45,8 @@ fn main() { pub const CMPLOG_MAP_H: usize = {cmplog_map_h}; /// The size of the accounting maps pub const ACCOUNTING_MAP_SIZE: usize = {acc_map_size}; + /// The size of the tables map + pub const TABLES_MAP_SIZE: usize = {tables_map_size}; " ) .expect("Could not write file"); @@ -51,6 +56,7 @@ fn main() { println!("cargo:rerun-if-env-changed=LIBAFL_CMPLOG_MAP_W"); println!("cargo:rerun-if-env-changed=LIBAFL_CMPLOG_MAP_H"); println!("cargo:rerun-if-env-changed=LIBAFL_ACCOUNTING_MAP_SIZE"); + println!("cargo:rerun-if-env-changed=LIBAFL_TABLES_MAP_SIZE"); //std::env::set_var("CC", "clang"); //std::env::set_var("CXX", "clang++"); diff --git a/libafl_targets/src/lib.rs b/libafl_targets/src/lib.rs index 5c619f0f94..854ef3efef 100644 --- a/libafl_targets/src/lib.rs +++ b/libafl_targets/src/lib.rs @@ -98,6 +98,9 @@ pub use value_profile::*; pub mod cmplog; pub use cmplog::*; +pub mod tables; +pub use tables::*; + #[cfg(feature = "std")] pub mod drcov; diff --git a/libafl_targets/src/tables.rs b/libafl_targets/src/tables.rs new file mode 100644 index 0000000000..b0a6f5d19d --- /dev/null +++ b/libafl_targets/src/tables.rs @@ -0,0 +1,32 @@ +//! Tables state transition pass runtime for `LibAFL`. + +use crate::TABLES_MAP_SIZE; + +/// Map with tables transitions +pub static mut TABLES_MAP: [u8; TABLES_MAP_SIZE] = [0; TABLES_MAP_SIZE]; + +fn merge_u32(a: u32, b: u32) -> u64 { + (a as u64) << 32 + (b as u64) +} + +/// From https://sair.synerise.com/efficient-integer-pairs-hashing/ +fn splitmix64(target: u64, seed: u64) -> u64 { + let sp_step = 0x9E3779B97F4A7C15_u64; + let mut out = target; + out = out + seed * sp_step; + out ^= out >> 30; + out *= 0xBF58476D1CE4E5B9_u64; + out ^= out >> 27; + out *= 0x94D049BB133111EB_u64; + out ^= out >> 31; + out +} + +#[no_mangle] +/// Log tables transitions and insert them in the map +pub extern "C" fn __libafl_tables_transition(cur: u32, next: u32) { + let hash = splitmix64(merge_u32(cur, next), 52) as usize % TABLES_MAP_SIZE; + unsafe { + TABLES_MAP[hash] += 1; + } +} From 8169b23e727296a50d25663cd6dc18f5833da514 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Wed, 12 Jul 2023 14:55:41 +0200 Subject: [PATCH 2/5] fuzzbench_tables example fuzzer --- fuzzers/fuzzbench_tables/.gitignore | 2 + fuzzers/fuzzbench_tables/Cargo.toml | 33 ++ fuzzers/fuzzbench_tables/Makefile.toml | 106 +++++ fuzzers/fuzzbench_tables/README.md | 17 + fuzzers/fuzzbench_tables/fuzz.c | 15 + fuzzers/fuzzbench_tables/src/bin/libafl_cc.rs | 46 ++ .../fuzzbench_tables/src/bin/libafl_cxx.rs | 5 + fuzzers/fuzzbench_tables/src/lib.rs | 405 ++++++++++++++++++ fuzzers/fuzzbench_tables/stub_rt.c | 29 ++ libafl_cc/src/clang.rs | 2 +- libafl_cc/src/tables-pass.cc | 16 +- libafl_targets/src/tables.rs | 2 +- 12 files changed, 668 insertions(+), 10 deletions(-) create mode 100644 fuzzers/fuzzbench_tables/.gitignore create mode 100644 fuzzers/fuzzbench_tables/Cargo.toml create mode 100644 fuzzers/fuzzbench_tables/Makefile.toml create mode 100644 fuzzers/fuzzbench_tables/README.md create mode 100644 fuzzers/fuzzbench_tables/fuzz.c create mode 100644 fuzzers/fuzzbench_tables/src/bin/libafl_cc.rs create mode 100644 fuzzers/fuzzbench_tables/src/bin/libafl_cxx.rs create mode 100644 fuzzers/fuzzbench_tables/src/lib.rs create mode 100644 fuzzers/fuzzbench_tables/stub_rt.c diff --git a/fuzzers/fuzzbench_tables/.gitignore b/fuzzers/fuzzbench_tables/.gitignore new file mode 100644 index 0000000000..d3561edaf7 --- /dev/null +++ b/fuzzers/fuzzbench_tables/.gitignore @@ -0,0 +1,2 @@ +libpng-* +fuzzer diff --git a/fuzzers/fuzzbench_tables/Cargo.toml b/fuzzers/fuzzbench_tables/Cargo.toml new file mode 100644 index 0000000000..13d208052b --- /dev/null +++ b/fuzzers/fuzzbench_tables/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "fuzzbench" +version = "0.10.1" +authors = ["Andrea Fioraldi ", "Dominik Maier "] +edition = "2021" + +[features] +default = ["std"] +std = [] +no_link_main = ["libafl_targets/libfuzzer_no_link_main"] + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 +debug = true + +[build-dependencies] +cc = { version = "1.0", features = ["parallel"] } +which = { version = "4.0.2" } + +[dependencies] +libafl = { path = "../../libafl/" } +libafl_targets = { path = "../../libafl_targets/", features = ["sancov_pcguard_hitcounts", "sancov_cmplog", "libfuzzer"] } +# TODO Include it only when building cc +libafl_cc = { path = "../../libafl_cc/" } +clap = { version = "4.0", features = ["default"] } +nix = "0.26" +mimalloc = { version = "*", default-features = false } + +[lib] +name = "fuzzbench" +crate-type = ["staticlib"] diff --git a/fuzzers/fuzzbench_tables/Makefile.toml b/fuzzers/fuzzbench_tables/Makefile.toml new file mode 100644 index 0000000000..1cfd6e3b55 --- /dev/null +++ b/fuzzers/fuzzbench_tables/Makefile.toml @@ -0,0 +1,106 @@ +[env] +PROJECT_DIR = { script = ["pwd"] } +CARGO_TARGET_DIR = { value = "${PROJECT_DIR}/target", condition = { env_not_set = ["CARGO_TARGET_DIR"] } } +FUZZER_NAME="fuzzer" + +[tasks.unsupported] +script_runner="@shell" +script=''' +echo "Cargo-make not integrated yet on this" +''' + +# Compilers +[tasks.cxx] +linux_alias = "cxx_unix" +mac_alias = "cxx_unix" +windows_alias = "unsupported" + +[tasks.cxx_unix] +command = "cargo" +args = ["build" , "--release"] + +[tasks.cc] +linux_alias = "cc_unix" +mac_alias = "cc_unix" +windows_alias = "unsupported" + +[tasks.cc_unix] +command = "cargo" +args = ["build" , "--release"] + +# fuzz.o File +[tasks.fuzz_o] +linux_alias = "fuzz_o_unix" +mac_alias = "fuzz_o_unix" +windows_alias = "unsupported" + +[tasks.fuzz_o_unix] +command = "${CARGO_TARGET_DIR}/release/libafl_cc" +args = ["--libafl-no-link", "-O3", "-c", "fuzz.c", "-o", "fuzz.o"] +dependencies = ["cc", "cxx"] + +# Fuzzer +[tasks.fuzzer] +linux_alias = "fuzzer_unix" +mac_alias = "fuzzer_unix" +windows_alias = "unsupported" + +[tasks.fuzzer_unix] +command = "${CARGO_TARGET_DIR}/release/libafl_cxx" +args = ["--libafl", "fuzz.o", "-o", "${FUZZER_NAME}", "-lm", "-lz"] +dependencies = ["cc", "cxx", "fuzz_o"] + +# Run +[tasks.run] +linux_alias = "run_unix" +mac_alias = "run_unix" +windows_alias = "unsupported" + +[tasks.run_unix] +script_runner="@shell" +script=''' +rm -rf libafl_unix_shmem_server || true +mkdir in || true +echo a > in/a +./${FUZZER_NAME} -o out -i in +''' +dependencies = ["fuzzer"] + + +# Test +[tasks.test] +linux_alias = "test_unix" +mac_alias = "test_unix" +windows_alias = "unsupported" + +[tasks.test_unix] +script_runner="@shell" +script=''' +rm -rf libafl_unix_shmem_server || true +mkdir in || true +echo a > in/a +# Allow sigterm as exit code +timeout 11s ./${FUZZER_NAME} -o out -i in >fuzz_stdout.log || true +if [ -z "$(grep "objectives: 10" fuzz_stdout.log)" ]; then + echo "Fuzzer does not generate any testcases or any crashes" + exit 1 +else + echo "Fuzzer is working" +fi +rm -rf out || true +rm -rf in || true +''' +dependencies = ["fuzzer"] + +# Clean +[tasks.clean] +linux_alias = "clean_unix" +mac_alias = "clean_unix" +windows_alias = "unsupported" + +[tasks.clean_unix] +script_runner="@shell" +script=''' +rm ./${FUZZER_NAME} || true +rm fuzz.o || true +''' diff --git a/fuzzers/fuzzbench_tables/README.md b/fuzzers/fuzzbench_tables/README.md new file mode 100644 index 0000000000..df34f5e090 --- /dev/null +++ b/fuzzers/fuzzbench_tables/README.md @@ -0,0 +1,17 @@ +# Fuzzbench Harness + +This folder contains an example fuzzer tailored for fuzzbench. +It uses the best possible setting, with the exception of a SimpleRestartingEventManager instead of an LlmpEventManager - since fuzzbench is single threaded. +Real fuzz campaigns should consider using multithreaded LlmpEventManager, see the other examples. + +## Build + +To build this example, run `cargo build --release`. +This will build the fuzzer compilers (`libafl_cc` and `libafl_cpp`) with `src/lib.rs` as fuzzer. +The fuzzer uses the libfuzzer compatibility layer and the SanitizerCoverage runtime functions for coverage feedback. + +These can then be used to build libfuzzer harnesses in the software project of your choice. +Finally, just run the resulting binary with `out_dir`, `in_dir`. + +In any real-world scenario, you should use `taskset` to pin each client to an empty CPU core, the lib does not pick an empty core automatically (yet). + diff --git a/fuzzers/fuzzbench_tables/fuzz.c b/fuzzers/fuzzbench_tables/fuzz.c new file mode 100644 index 0000000000..9175059e59 --- /dev/null +++ b/fuzzers/fuzzbench_tables/fuzz.c @@ -0,0 +1,15 @@ +#include +#include + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size >= 8 && *(uint32_t *)Data == 0xaabbccdd) { abort(); } + return 0; +} + +/* +int main() { + + char buf [10] = {0}; + LLVMFuzzerTestOneInput(buf, 10); + +}*/ diff --git a/fuzzers/fuzzbench_tables/src/bin/libafl_cc.rs b/fuzzers/fuzzbench_tables/src/bin/libafl_cc.rs new file mode 100644 index 0000000000..9f3703f49a --- /dev/null +++ b/fuzzers/fuzzbench_tables/src/bin/libafl_cc.rs @@ -0,0 +1,46 @@ +use std::env; + +use libafl_cc::{ClangWrapper, CompilerWrapper, LLVMPasses, ToolWrapper}; + +pub fn main() { + let mut args: Vec = env::args().collect(); + if args.len() > 1 { + let mut dir = env::current_exe().unwrap(); + let wrapper_name = dir.file_name().unwrap().to_str().unwrap(); + + let is_cpp = match wrapper_name[wrapper_name.len()-2..].to_lowercase().as_str() { + "cc" => false, + "++" | "pp" | "xx" => true, + _ => panic!("Could not figure out if c or c++ wrapper was called. Expected {dir:?} to end with c or cxx"), + }; + + dir.pop(); + + // Must be always present, even without --libafl + args.push("-fsanitize-coverage=trace-pc-guard,trace-cmp".into()); + + let mut cc = ClangWrapper::new(); + + #[cfg(any(target_os = "linux", target_vendor = "apple"))] + cc.add_pass(LLVMPasses::AutoTokens); + + if let Some(code) = cc + .cpp(is_cpp) + // silence the compiler wrapper output, needed for some configure scripts. + .silence(true) + // add arguments only if --libafl or --libafl-no-link are present + .need_libafl_arg(true) + .parse_args(&args) + .expect("Failed to parse the command line") + .link_staticlib(&dir, "fuzzbench") + .add_pass(LLVMPasses::CmpLogRtn) + .add_pass(LLVMPasses::Tables) + .run() + .expect("Failed to run the wrapped compiler") + { + std::process::exit(code); + } + } else { + panic!("LibAFL CC: No Arguments given"); + } +} diff --git a/fuzzers/fuzzbench_tables/src/bin/libafl_cxx.rs b/fuzzers/fuzzbench_tables/src/bin/libafl_cxx.rs new file mode 100644 index 0000000000..dabd22971a --- /dev/null +++ b/fuzzers/fuzzbench_tables/src/bin/libafl_cxx.rs @@ -0,0 +1,5 @@ +pub mod libafl_cc; + +fn main() { + libafl_cc::main(); +} diff --git a/fuzzers/fuzzbench_tables/src/lib.rs b/fuzzers/fuzzbench_tables/src/lib.rs new file mode 100644 index 0000000000..d18b3e8f4a --- /dev/null +++ b/fuzzers/fuzzbench_tables/src/lib.rs @@ -0,0 +1,405 @@ +//! A singlethreaded libfuzzer-like fuzzer that can auto-restart. +use mimalloc::MiMalloc; +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; + +use core::{cell::RefCell, time::Duration}; +#[cfg(unix)] +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::{ + env, + fs::{self, File, OpenOptions}, + io::{self, Read, Write}, + path::PathBuf, + process, +}; + +use clap::{Arg, Command}; +use libafl::{ + bolts::{ + current_nanos, current_time, + os::dup2, + rands::StdRand, + shmem::{ShMemProvider, StdShMemProvider}, + tuples::{tuple_list, Merge}, + AsSlice, + }, + corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus}, + events::SimpleRestartingEventManager, + executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor}, + feedback_or, + feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback}, + fuzzer::{Fuzzer, StdFuzzer}, + inputs::{BytesInput, HasTargetBytes}, + monitors::SimpleMonitor, + mutators::{ + scheduled::havoc_mutations, token_mutations::I2SRandReplace, tokens_mutations, + StdMOptMutator, StdScheduledMutator, Tokens, + }, + observers::{ConstMapObserver, HitcountsMapObserver, TimeObserver}, + schedulers::{ + powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler, + }, + stages::{ + calibrate::CalibrationStage, power::StdPowerMutationalStage, StdMutationalStage, + TracingStage, + }, + state::{HasCorpus, HasMetadata, StdState}, + Error, +}; +#[cfg(any(target_os = "linux", target_vendor = "apple"))] +use libafl_targets::autotokens; +use libafl_targets::{ + libfuzzer_initialize, libfuzzer_test_one_input, std_edges_map_observer, CmpLogObserver, + TABLES_MAP, TABLES_MAP_SIZE, +}; +#[cfg(unix)] +use nix::{self, unistd::dup}; + +/// The fuzzer main (as `no_mangle` C function) +#[no_mangle] +pub fn libafl_main() { + // Registry the metadata types used in this fuzzer + // Needed only on no_std + //RegistryBuilder::register::(); + + let res = match Command::new(env!("CARGO_PKG_NAME")) + .version(env!("CARGO_PKG_VERSION")) + .author("AFLplusplus team") + .about("LibAFL-based fuzzer for Fuzzbench") + .arg( + Arg::new("out") + .short('o') + .long("output") + .help("The directory to place finds in ('corpus')"), + ) + .arg( + Arg::new("in") + .short('i') + .long("input") + .help("The directory to read initial inputs from ('seeds')"), + ) + .arg( + Arg::new("tokens") + .short('x') + .long("tokens") + .help("A file to read tokens from, to be used during fuzzing"), + ) + .arg( + Arg::new("logfile") + .short('l') + .long("logfile") + .help("Duplicates all output to this file") + .default_value("libafl.log"), + ) + .arg( + Arg::new("timeout") + .short('t') + .long("timeout") + .help("Timeout for each individual execution, in milliseconds") + .default_value("1200"), + ) + .arg(Arg::new("remaining")) + .try_get_matches() + { + Ok(res) => res, + Err(err) => { + println!( + "Syntax: {}, [-x dictionary] -o corpus_dir -i seed_dir\n{:?}", + env::current_exe() + .unwrap_or_else(|_| "fuzzer".into()) + .to_string_lossy(), + err, + ); + return; + } + }; + + println!( + "Workdir: {:?}", + env::current_dir().unwrap().to_string_lossy().to_string() + ); + + if let Some(filenames) = res.get_many::("remaining") { + let filenames: Vec<&str> = filenames.map(String::as_str).collect(); + if !filenames.is_empty() { + run_testcases(&filenames); + return; + } + } + + // For fuzzbench, crashes and finds are inside the same `corpus` directory, in the "queue" and "crashes" subdir. + let mut out_dir = PathBuf::from( + res.get_one::("out") + .expect("The --output parameter is missing") + .to_string(), + ); + if fs::create_dir(&out_dir).is_err() { + println!("Out dir at {:?} already exists.", &out_dir); + if !out_dir.is_dir() { + println!("Out dir at {:?} is not a valid directory!", &out_dir); + return; + } + } + let mut crashes = out_dir.clone(); + crashes.push("crashes"); + out_dir.push("queue"); + + let in_dir = PathBuf::from( + res.get_one::("in") + .expect("The --input parameter is missing") + .to_string(), + ); + if !in_dir.is_dir() { + println!("In dir at {:?} is not a valid directory!", &in_dir); + return; + } + + let tokens = res.get_one::("tokens").map(PathBuf::from); + + let logfile = PathBuf::from(res.get_one::("logfile").unwrap().to_string()); + + let timeout = Duration::from_millis( + res.get_one::("timeout") + .unwrap() + .to_string() + .parse() + .expect("Could not parse timeout in milliseconds"), + ); + + fuzz(out_dir, crashes, &in_dir, tokens, &logfile, timeout) + .expect("An error occurred while fuzzing"); +} + +fn run_testcases(filenames: &[&str]) { + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1"); + } + + println!( + "You are not fuzzing, just executing {} testcases", + filenames.len() + ); + for fname in filenames { + println!("Executing {fname}"); + + let mut file = File::open(fname).expect("No file found"); + let mut buffer = vec![]; + file.read_to_end(&mut buffer).expect("Buffer overflow"); + + libfuzzer_test_one_input(&buffer); + } +} + +/// The actual fuzzer +#[allow(clippy::too_many_lines)] +fn fuzz( + corpus_dir: PathBuf, + objective_dir: PathBuf, + seed_dir: &PathBuf, + tokenfile: Option, + logfile: &PathBuf, + timeout: Duration, +) -> Result<(), Error> { + let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?); + + #[cfg(unix)] + let mut stdout_cpy = unsafe { + let new_fd = dup(io::stdout().as_raw_fd())?; + File::from_raw_fd(new_fd) + }; + #[cfg(unix)] + let file_null = File::open("/dev/null")?; + + // 'While the monitor are state, they are usually used in the broker - which is likely never restarted + let monitor = SimpleMonitor::new(|s| { + #[cfg(unix)] + writeln!(&mut stdout_cpy, "{s}").unwrap(); + #[cfg(windows)] + println!("{s}"); + writeln!(log.borrow_mut(), "{:?} {s}", current_time()).unwrap(); + }); + + // We need a shared map to store our state before a crash. + // This way, we are able to continue fuzzing afterwards. + let mut shmem_provider = StdShMemProvider::new()?; + + let (state, mut mgr) = match SimpleRestartingEventManager::launch(monitor, &mut shmem_provider) + { + // The restarting state will spawn the same process again as child, then restarted it each time it crashes. + Ok(res) => res, + Err(err) => match err { + Error::ShuttingDown => { + return Ok(()); + } + _ => { + panic!("Failed to setup the restarter: {err}"); + } + }, + }; + + // Create an observation channel using the coverage map + // We don't use the hitcounts (see the Cargo.toml, we use pcguard_edges) + let edges_observer = HitcountsMapObserver::new(unsafe { std_edges_map_observer("edges") }); + + let tables_observer = + ConstMapObserver::::new("tables", unsafe { &mut TABLES_MAP }); + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + let cmplog_observer = CmpLogObserver::new("cmplog", true); + + let map_feedback = MaxMapFeedback::tracking(&edges_observer, true, false); + + let tables_feedback = MaxMapFeedback::new(&tables_observer); + + let calibration = CalibrationStage::new(&map_feedback); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let mut feedback = feedback_or!( + // New maximization map feedback linked to the edges observer and the feedback state + map_feedback, + tables_feedback, + // Time feedback, this one does not need a feedback state + TimeFeedback::with_observer(&time_observer) + ); + + // A feedback to choose if an input is a solution or not + let mut objective = CrashFeedback::new(); + + // If not restarting, create a State from scratch + let mut state = state.unwrap_or_else(|| { + StdState::new( + // RNG + StdRand::with_seed(current_nanos()), + // Corpus that will be evolved, we keep it in memory for performance + InMemoryOnDiskCorpus::new(corpus_dir).unwrap(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(objective_dir).unwrap(), + // States of the feedbacks. + // The feedbacks can report the data that should persist in the State. + &mut feedback, + // Same for objective feedbacks + &mut objective, + ) + .unwrap() + }); + + println!("Let's fuzz :)"); + + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1"); + } + + // Setup a randomic Input2State stage + let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); + + // Setup a MOPT mutator + let mutator = StdMOptMutator::new( + &mut state, + havoc_mutations().merge(tokens_mutations()), + 7, + 5, + )?; + + let power = StdPowerMutationalStage::new(mutator); + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = IndexesLenTimeMinimizerScheduler::new(StdWeightedScheduler::with_schedule( + &mut state, + &edges_observer, + Some(PowerSchedule::FAST), + )); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + // The wrapped harness function, calling out to the LLVM-style harness + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + libfuzzer_test_one_input(buf); + ExitKind::Ok + }; + + let mut tracing_harness = harness; + + // Create the executor for an in-process function with one observer for edge coverage and one for the execution time + let mut executor = TimeoutExecutor::new( + InProcessExecutor::new( + &mut harness, + tuple_list!(edges_observer, tables_observer, time_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + timeout, + ); + + // Setup a tracing stage in which we log comparisons + let tracing = TracingStage::new(TimeoutExecutor::new( + InProcessExecutor::new( + &mut tracing_harness, + tuple_list!(cmplog_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + // Give it more time! + timeout * 10, + )); + + // The order of the stages matter! + let mut stages = tuple_list!(calibration, tracing, i2s, power); + + // Read tokens + if state.metadata_map().get::().is_none() { + let mut toks = Tokens::default(); + if let Some(tokenfile) = tokenfile { + toks.add_from_file(tokenfile)?; + } + #[cfg(any(target_os = "linux", target_vendor = "apple"))] + { + toks += autotokens()?; + } + + if !toks.is_empty() { + state.add_metadata(toks); + } + } + + // In case the corpus is empty (on first run), reset + if state.must_load_initial_inputs() { + state + .load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[seed_dir.clone()]) + .unwrap_or_else(|_| { + println!("Failed to load initial corpus at {:?}", &seed_dir); + process::exit(0); + }); + println!("We imported {} inputs from disk.", state.corpus().count()); + } + + // Remove target ouput (logs still survive) + #[cfg(unix)] + { + let null_fd = file_null.as_raw_fd(); + dup2(null_fd, io::stdout().as_raw_fd())?; + dup2(null_fd, io::stderr().as_raw_fd())?; + } + // reopen file to make sure we're at the end + log.replace(OpenOptions::new().append(true).create(true).open(logfile)?); + + fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; + + // Never reached + Ok(()) +} diff --git a/fuzzers/fuzzbench_tables/stub_rt.c b/fuzzers/fuzzbench_tables/stub_rt.c new file mode 100644 index 0000000000..3e9c1a5c22 --- /dev/null +++ b/fuzzers/fuzzbench_tables/stub_rt.c @@ -0,0 +1,29 @@ +#include + +__attribute__ ((weak)) void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { +} + +__attribute__ ((weak)) void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { +} + +__attribute__ ((weak)) void __cmplog_rtn_hook(uint8_t *ptr1, uint8_t *ptr2) { +} + +__attribute__ ((weak)) void __cmplog_rtn_gcc_stdstring_cstring(uint8_t *stdstring, uint8_t *cstring) { +} + +__attribute__ ((weak)) void __cmplog_rtn_gcc_stdstring_stdstring(uint8_t *stdstring1, uint8_t *stdstring2) { +} + +__attribute__ ((weak)) void __cmplog_rtn_llvm_stdstring_cstring(uint8_t *stdstring, uint8_t *cstring) { +} + +__attribute__ ((weak)) void __cmplog_rtn_llvm_stdstring_stdstring(uint8_t *stdstring1, uint8_t *stdstring2) { +} + +__attribute__ ((weak)) void __libafl_tables_transition(uint32_t arg1, uint32_t arg2) { +} + +extern void libafl_main(void); + +int main(int argc, char **argv) { libafl_main(); return 0; } diff --git a/libafl_cc/src/clang.rs b/libafl_cc/src/clang.rs index a7c8509546..fab54b98c0 100644 --- a/libafl_cc/src/clang.rs +++ b/libafl_cc/src/clang.rs @@ -60,7 +60,7 @@ impl LLVMPasses { LLVMPasses::CoverageAccounting => PathBuf::from(env!("OUT_DIR")) .join(format!("coverage-accounting-pass.{}", dll_extension())), LLVMPasses::Tables => { - PathBuf::from(env!("OUT_DIR")).join(format!("tabes-pass.{}", dll_extension())) + PathBuf::from(env!("OUT_DIR")).join(format!("tables-pass.{}", dll_extension())) } LLVMPasses::DumpCfg => { PathBuf::from(env!("OUT_DIR")).join(format!("dump-cfg-pass.{}", dll_extension())) diff --git a/libafl_cc/src/tables-pass.cc b/libafl_cc/src/tables-pass.cc index 4949f34806..0ed4b823ec 100644 --- a/libafl_cc/src/tables-pass.cc +++ b/libafl_cc/src/tables-pass.cc @@ -1,5 +1,5 @@ /* - LibAFL - Coverage accounting LLVM pass + LibAFL - Parsing tables coverage LLVM pass -------------------------------------------------- Written by Andrea Fioraldi @@ -163,7 +163,7 @@ bool TamingParsingTables::runOnModule(Module &M) { Value *IDX = *GEP->idx_begin(); IDX = recurseCast(IDX); - if ((LI = dyn_cast(IDX)) && loads.contains(LI)) { + if ((LI = dyn_cast(IDX)) && loads.find(LI) != loads.end()) { geps.insert(GEP); } } else if ((ST = dyn_cast(&I))) { @@ -174,7 +174,7 @@ bool TamingParsingTables::runOnModule(Module &M) { if ((GL = dyn_cast(VAL))) { V = GL->getPointerOperand(); - if (V == nullptr || !geps.contains(V)) { continue; } + if (V == nullptr || geps.find(V) == geps.end()) { continue; } } else { continue; } @@ -216,14 +216,14 @@ bool TamingParsingTables::runOnModule(Module &M) { } #ifndef USE_NEW_PM -static void registerAFLPass(const PassManagerBuilder &, +static void registerTablesPass(const PassManagerBuilder &, legacy::PassManagerBase &PM) { PM.add(new TamingParsingTables()); } -static RegisterStandardPasses RegisterAFLPass( - PassManagerBuilder::EP_OptimizerLast, registerAFLPass); +static RegisterStandardPasses RegisterTablesPass( + PassManagerBuilder::EP_OptimizerLast, registerTablesPass); -static RegisterStandardPasses RegisterAFLPass0( - PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass); +static RegisterStandardPasses RegisterTablesPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerTablesPass); #endif diff --git a/libafl_targets/src/tables.rs b/libafl_targets/src/tables.rs index b0a6f5d19d..c6ab036205 100644 --- a/libafl_targets/src/tables.rs +++ b/libafl_targets/src/tables.rs @@ -27,6 +27,6 @@ fn splitmix64(target: u64, seed: u64) -> u64 { pub extern "C" fn __libafl_tables_transition(cur: u32, next: u32) { let hash = splitmix64(merge_u32(cur, next), 52) as usize % TABLES_MAP_SIZE; unsafe { - TABLES_MAP[hash] += 1; + TABLES_MAP[hash] = 1; } } From e46527465ff68a7890747881cc00022f5d016da1 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Wed, 12 Jul 2023 15:13:09 +0200 Subject: [PATCH 3/5] fmt --- libafl_cc/src/tables-pass.cc | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/libafl_cc/src/tables-pass.cc b/libafl_cc/src/tables-pass.cc index 0ed4b823ec..2310d98569 100644 --- a/libafl_cc/src/tables-pass.cc +++ b/libafl_cc/src/tables-pass.cc @@ -118,7 +118,8 @@ char TamingParsingTables::ID = 0; #endif #ifdef USE_NEW_PM -PreservedAnalyses TamingParsingTables::run(Module &M, ModuleAnalysisManager &MAM) { +PreservedAnalyses TamingParsingTables::run(Module &M, + ModuleAnalysisManager &MAM) { #else bool TamingParsingTables::runOnModule(Module &M) { #endif @@ -130,9 +131,10 @@ bool TamingParsingTables::runOnModule(Module &M) { IntegerType *Int16Ty = IntegerType::getInt16Ty(C); IntegerType *Int8Ty = IntegerType::getInt8Ty(C); IntegerType *Int1Ty = IntegerType::getInt1Ty(C); - Type *VoidTy = Type::getVoidTy(C); + Type *VoidTy = Type::getVoidTy(C); - FunctionCallee LogFunc = M.getOrInsertFunction("__libafl_tables_transition", VoidTy, Int32Ty, Int32Ty); + FunctionCallee LogFunc = M.getOrInsertFunction("__libafl_tables_transition", + VoidTy, Int32Ty, Int32Ty); #ifdef USE_NEW_PM auto PA = PreservedAnalyses::all(); @@ -162,7 +164,7 @@ bool TamingParsingTables::runOnModule(Module &M) { // TODO handle multiple idxs Value *IDX = *GEP->idx_begin(); IDX = recurseCast(IDX); - + if ((LI = dyn_cast(IDX)) && loads.find(LI) != loads.end()) { geps.insert(GEP); } @@ -171,7 +173,7 @@ bool TamingParsingTables::runOnModule(Module &M) { Value *VAL = recurseCast(ST->getValueOperand()); LoadInst *GL = nullptr; Value *V = nullptr; - + if ((GL = dyn_cast(VAL))) { V = GL->getPointerOperand(); if (V == nullptr || geps.find(V) == geps.end()) { continue; } @@ -190,17 +192,19 @@ bool TamingParsingTables::runOnModule(Module &M) { recurseCast(ST->getPointerOperand())) continue; - std::string location = std::string("UNKNOWN"); - if (DILocation *Loc = GEP->getDebugLoc().get()) { - location = std::string(Loc->getFilename().data()) + - std::string(":") + std::to_string(Loc->getLine()); + if (Debug) { + std::string location = std::string("UNKNOWN"); + if (DILocation *Loc = GEP->getDebugLoc().get()) { + location = std::string(Loc->getFilename().data()) + + std::string(":") + std::to_string(Loc->getLine()); + } + + errs() << "FOUND " << location << "\n\t" << *LI << "\n\t" << *GEP + << "\n\t" << *ST << "\n\n"; } - errs() << "FOUND " << location << "\n\t" << *LI << "\n\t" << *GEP - << "\n\t" << *ST << "\n\n"; - IRBuilder<> IRB(ST); - Value *A1 = IRB.CreateIntCast(LI, Int32Ty, false); + Value *A1 = IRB.CreateIntCast(LI, Int32Ty, false); Value *A2 = IRB.CreateIntCast(ST->getValueOperand(), Int32Ty, false); IRB.CreateCall(LogFunc, {A1, A2}); } @@ -217,7 +221,7 @@ bool TamingParsingTables::runOnModule(Module &M) { #ifndef USE_NEW_PM static void registerTablesPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { + legacy::PassManagerBase &PM) { PM.add(new TamingParsingTables()); } From 6c6970e4819e43785c366fe007fe5182bf3fd005 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Wed, 12 Jul 2023 15:30:57 +0200 Subject: [PATCH 4/5] clippy --- libafl_targets/src/tables.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libafl_targets/src/tables.rs b/libafl_targets/src/tables.rs index c6ab036205..9361ab9892 100644 --- a/libafl_targets/src/tables.rs +++ b/libafl_targets/src/tables.rs @@ -6,14 +6,14 @@ use crate::TABLES_MAP_SIZE; pub static mut TABLES_MAP: [u8; TABLES_MAP_SIZE] = [0; TABLES_MAP_SIZE]; fn merge_u32(a: u32, b: u32) -> u64 { - (a as u64) << 32 + (b as u64) + (u64::from(a) << 32) + u64::from(b) } -/// From https://sair.synerise.com/efficient-integer-pairs-hashing/ +/// From fn splitmix64(target: u64, seed: u64) -> u64 { let sp_step = 0x9E3779B97F4A7C15_u64; let mut out = target; - out = out + seed * sp_step; + out += seed * sp_step; out ^= out >> 30; out *= 0xBF58476D1CE4E5B9_u64; out ^= out >> 27; From bf0a037d63b607646d82ca7113e5973321fc1ec1 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Tue, 18 Jul 2023 13:47:50 +0200 Subject: [PATCH 5/5] fix --- libafl/src/monitors/multi.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/libafl/src/monitors/multi.rs b/libafl/src/monitors/multi.rs index 5eb6db8db8..4add6681a2 100644 --- a/libafl/src/monitors/multi.rs +++ b/libafl/src/monitors/multi.rs @@ -3,7 +3,10 @@ #[cfg(feature = "introspection")] use alloc::string::ToString; use alloc::{string::String, vec::Vec}; -use core::{fmt::Write, time::Duration}; +use core::{ + fmt::{Debug, Write}, + time::Duration, +}; use crate::{ bolts::{current_time, format_duration_hms, ClientId}, @@ -11,7 +14,7 @@ use crate::{ }; /// Tracking monitor during fuzzing and display both per-client and cumulative info. -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct MultiMonitor where F: FnMut(String), @@ -21,6 +24,18 @@ where client_stats: Vec, } +impl Debug for MultiMonitor +where + F: FnMut(String), +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("SimpleMonitor") + .field("start_time", &self.start_time) + .field("client_stats", &self.client_stats) + .finish_non_exhaustive() + } +} + impl Monitor for MultiMonitor where F: FnMut(String),