From c7303ace533c9ba4c0538072bb70cff167834e21 Mon Sep 17 00:00:00 2001 From: "Dongjia \"toka\" Zhang" Date: Wed, 15 May 2024 16:50:20 +0200 Subject: [PATCH] Static analysis pass (#2178) * copy * fmt * aa * a * Goodbye z3, i hope i won't build you forever * doc * fmt --- fuzzers/libfuzzer_libpng_cmin/Cargo.toml | 1 + fuzzers/libfuzzer_libpng_cmin/Makefile.toml | 4 +- .../src/bin/libafl_cc.rs | 1 + fuzzers/libfuzzer_libpng_cmin/src/lib.rs | 1 + libafl/Cargo.toml | 2 +- libafl_cc/src/profiling.cc | 881 ++++++++++++++++++ scripts/fmt_all.sh | 2 +- 7 files changed, 888 insertions(+), 4 deletions(-) create mode 100644 libafl_cc/src/profiling.cc diff --git a/fuzzers/libfuzzer_libpng_cmin/Cargo.toml b/fuzzers/libfuzzer_libpng_cmin/Cargo.toml index f4435c1d8c..45f526b1de 100644 --- a/fuzzers/libfuzzer_libpng_cmin/Cargo.toml +++ b/fuzzers/libfuzzer_libpng_cmin/Cargo.toml @@ -21,6 +21,7 @@ cc = { version = "1.0", features = ["parallel"] } which = "4.4" [dependencies] +env_logger = "0.10" libafl = { path = "../../libafl/", features = ["default", "cmin"] } # libafl = { path = "../../libafl/", features = ["default"] } libafl_bolts = { path = "../../libafl_bolts/" } diff --git a/fuzzers/libfuzzer_libpng_cmin/Makefile.toml b/fuzzers/libfuzzer_libpng_cmin/Makefile.toml index 29d66bea15..b590ba024b 100644 --- a/fuzzers/libfuzzer_libpng_cmin/Makefile.toml +++ b/fuzzers/libfuzzer_libpng_cmin/Makefile.toml @@ -104,7 +104,7 @@ windows_alias = "unsupported" [tasks.fuzzer_unix] command = "${CARGO_TARGET_DIR}/${PROFILE_DIR}/libafl_cxx" -args = ["${PROJECT_DIR}/harness.cc", "${PROJECT_DIR}/libpng-1.6.37/.libs/libpng16.a", "-I", "${PROJECT_DIR}/libpng-1.6.37/", "-o", "${FUZZER_NAME}", "-lm", "-lz"] +args = ["${PROJECT_DIR}/harness.cc", "${PROJECT_DIR}/libpng-1.6.37/.libs/libpng16.a", "-I", "${PROJECT_DIR}/libpng-1.6.37/", "-o", "${FUZZER_NAME}", "-lm", "-lz", "-lz3"] dependencies = [ "lib", "cxx", "cc" ] # Crashing Harness @@ -115,7 +115,7 @@ windows_alias = "unsupported" [tasks.fuzzer_crash_unix] command = "${CARGO_TARGET_DIR}/${PROFILE_DIR}/libafl_cxx" -args = ["${PROJECT_DIR}/harness.cc", "${PROJECT_DIR}/libpng-1.6.37/.libs/libpng16.a", "-I", "${PROJECT_DIR}/libpng-1.6.37/", "-o", "${FUZZER_NAME}_crash", "-lm", "-lz"] +args = ["${PROJECT_DIR}/harness.cc", "${PROJECT_DIR}/libpng-1.6.37/.libs/libpng16.a", "-I", "${PROJECT_DIR}/libpng-1.6.37/", "-o", "${FUZZER_NAME}_crash", "-lm", "-lz", "-lz3"] dependencies = [ "crash_lib", "crash_cxx", "crash_cc" ] # Run the fuzzer diff --git a/fuzzers/libfuzzer_libpng_cmin/src/bin/libafl_cc.rs b/fuzzers/libfuzzer_libpng_cmin/src/bin/libafl_cc.rs index 307fbc1154..0769acce0e 100644 --- a/fuzzers/libfuzzer_libpng_cmin/src/bin/libafl_cc.rs +++ b/fuzzers/libfuzzer_libpng_cmin/src/bin/libafl_cc.rs @@ -18,6 +18,7 @@ pub fn main() { .expect("Failed to parse the command line") .link_staticlib(&dir, "libfuzzer_libpng") .add_arg("-fsanitize-coverage=trace-pc-guard") + .add_arg("-lz3") .run() .expect("Failed to run the wrapped compiler") { diff --git a/fuzzers/libfuzzer_libpng_cmin/src/lib.rs b/fuzzers/libfuzzer_libpng_cmin/src/lib.rs index e3b660bfe7..e024dcdbe8 100644 --- a/fuzzers/libfuzzer_libpng_cmin/src/lib.rs +++ b/fuzzers/libfuzzer_libpng_cmin/src/lib.rs @@ -44,6 +44,7 @@ static GLOBAL: MiMalloc = MiMalloc; #[cfg(not(test))] #[no_mangle] pub extern "C" fn libafl_main() { + env_logger::init(); // Registry the metadata types used in this fuzzer // Needed only on no_std // unsafe { RegistryBuilder::register::(); } diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index fac0491680..1d9b624b38 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -179,7 +179,7 @@ tokio = { version = "1.28.1", optional = true, features = ["sync", "net", "rt", wait-timeout = { version = "0.2", optional = true } # used by CommandExecutor to wait for child process -z3 = { version = "0.12.0", features = ["static-link-z3"], optional = true } # for concolic mutation +z3 = { version = "0.12.0", optional = true } # for concolic mutation concat-idents = { version = "1.1.3", optional = true } diff --git a/libafl_cc/src/profiling.cc b/libafl_cc/src/profiling.cc new file mode 100644 index 0000000000..68b82ef914 --- /dev/null +++ b/libafl_cc/src/profiling.cc @@ -0,0 +1,881 @@ +/* + LibAFL - Profiling LLVM pass + -------------------------------------------------- + + Written by Dongjia Zhang + + Copyright 2022-2023 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +// This llvm pass is for conducting static analysis. + +#include +#include +#include +#ifndef _WIN32 + #include + #include +#else + #include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +// LLVM Includes + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/IRBuilder.h" +#if USE_NEW_PM + #include "llvm/IR/PassManager.h" + #include "llvm/Passes/PassBuilder.h" + #include "llvm/Passes/PassPlugin.h" +#else + #include "llvm/IR/LegacyPassManager.h" +#endif +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Support/FileSystem.h" + +// Other includes +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +namespace { + +#if USE_NEW_PM +class AnalysisPass : public PassInfoMixin { + public: + AnalysisPass() { +#else +class AnalysisPass : public ModulePass { + public: + static char ID; + + AnalysisPass() : ModulePass(ID) { +#endif + } + +#if USE_NEW_PM + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +#else + bool runOnModule(Module &M) override; +#endif + + protected: + DenseMap bb_to_cur_loc; + DenseMap entry_bb; + DenseMap> calls_in_bb; + DenseMap> structLinks; + DenseMap> structDesc; + // The type name is not in the memory, so create std::strign impromptu + + private: + uint32_t travereScope(DIScope *bottom) { + uint32_t level = 0; + for (auto scope = bottom; !isa(scope); + scope = scope->getScope()) { + level += 1; + } + + return level; + } + + std::string typeWriter(Type *typ) { + // Because there's no string object for the type in the memory + // I have to build the string myself + std::string type_str; + llvm::raw_string_ostream rso(type_str); + typ->print(rso); + return rso.str(); + } + + bool isMemCmp(Module &M, CallBase *cb) { + auto FT = cb->getCalledFunction()->getFunctionType(); + auto FuncName = cb->getCalledFunction()->getName().str(); + + bool isMemcmp = (!FuncName.compare("memcmp") || !FuncName.compare("bcmp") || + !FuncName.compare("CRYPTO_memcmp") || + !FuncName.compare("OPENSSL_memcmp") || + !FuncName.compare("memcmp_const_time") || + !FuncName.compare("memcmpct")); + isMemcmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy() && + FT->getParamType(2)->isIntegerTy(); + return isMemcmp; + } + + bool isStrcmp(Module &M, CallBase *cb) { + auto FT = cb->getCalledFunction()->getFunctionType(); + auto FuncName = cb->getCalledFunction()->getName().str(); + + bool isStrcmp = + (!FuncName.compare("strcmp") || !FuncName.compare("xmlStrcmp") || + !FuncName.compare("xmlStrEqual") || !FuncName.compare("g_strcmp0") || + !FuncName.compare("curl_strequal") || + !FuncName.compare("strcsequal") || !FuncName.compare("strcasecmp") || + !FuncName.compare("stricmp") || !FuncName.compare("ap_cstr_casecmp") || + !FuncName.compare("OPENSSL_strcasecmp") || + !FuncName.compare("xmlStrcasecmp") || + !FuncName.compare("g_strcasecmp") || + !FuncName.compare("g_ascii_strcasecmp") || + !FuncName.compare("Curl_strcasecompare") || + !FuncName.compare("Curl_safe_strcasecompare") || + !FuncName.compare("cmsstrcasecmp") || !FuncName.compare("strstr") || + !FuncName.compare("g_strstr_len") || + !FuncName.compare("ap_strcasestr") || !FuncName.compare("xmlStrstr") || + !FuncName.compare("xmlStrcasestr") || + !FuncName.compare("g_str_has_prefix") || + !FuncName.compare("g_str_has_suffix")); + isStrcmp &= + FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()); + + return isStrcmp; + } + + bool isStrncmp(Module &M, CallBase *cb) { + auto FT = cb->getCalledFunction()->getFunctionType(); + auto FuncName = cb->getCalledFunction()->getName().str(); + + bool isStrncmp = + (!FuncName.compare("strncmp") || !FuncName.compare("xmlStrncmp") || + !FuncName.compare("curl_strnequal") || + !FuncName.compare("strncasecmp") || !FuncName.compare("strnicmp") || + !FuncName.compare("ap_cstr_casecmpn") || + !FuncName.compare("OPENSSL_strncasecmp") || + !FuncName.compare("xmlStrncasecmp") || + !FuncName.compare("g_ascii_strncasecmp") || + !FuncName.compare("Curl_strncasecompare") || + !FuncName.compare("g_strncasecmp")); + isStrncmp &= + FT->getNumParams() == 3 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + return isStrncmp; + } + + bool isGccStdStringStdString(Module &M, CallBase *cb) { + auto FT = cb->getCalledFunction()->getFunctionType(); + auto Callee = cb->getCalledFunction(); + bool isGccStdStringStdString = + Callee->getName().find("__is_charIT_EE7__value") != std::string::npos && + Callee->getName().find("St7__cxx1112basic_stringIS2_St11char_traits") != + std::string::npos && + FT->getNumParams() >= 2 && FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0)->isPointerTy(); + return isGccStdStringStdString; + } + + bool isGccStdStringCString(Module &M, CallBase *cb) { + auto FT = cb->getCalledFunction()->getFunctionType(); + auto Callee = cb->getCalledFunction(); + + bool isGccStdStringCString = + Callee->getName().find( + "St7__cxx1112basic_stringIcSt11char_" + "traitsIcESaIcEE7compareEPK") != std::string::npos && + FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy(); + return isGccStdStringCString; + } + + bool isLlvmStdStringStdString(Module &M, CallBase *cb) { + auto FT = cb->getCalledFunction()->getFunctionType(); + auto Callee = cb->getCalledFunction(); + + bool isLlvmStdStringStdString = + Callee->getName().find("_ZNSt3__1eqI") != std::string::npos && + Callee->getName().find("_12basic_stringI") != std::string::npos && + Callee->getName().find("_11char_traits") != std::string::npos && + FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy(); + return isLlvmStdStringStdString; + } + + bool isLlvmStdStringCString(Module &M, CallBase *cb) { + auto FT = cb->getCalledFunction()->getFunctionType(); + auto Callee = cb->getCalledFunction(); + + bool isLlvmStdStringCString = + Callee->getName().find("_ZNSt3__1eqI") != std::string::npos && + Callee->getName().find("_12basic_stringI") != std::string::npos && + FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy(); + + return isLlvmStdStringCString; + } + + bool isLLVMIntrinsicFn(StringRef &n) { + // Not interested in these LLVM's functions + if (n.startswith("llvm.")) { + return true; + } else { + return false; + } + } + + bool isMemorySensitiveFn(StringRef &n) { + if (n.equals("write") || n.equals("read") || n.equals("fgets") || + n.equals("memcmp") || n.equals("memcpy") || n.equals("mempcpy") || + n.equals("memmove") || n.equals("memset") || n.equals("memchr") || + n.equals("memrchr") || n.equals("memmem") || n.equals("bzero") || + n.equals("explicit_bzero") || n.equals("bcmp") || n.equals("strchr") || + n.equals("strrchr") || n.equals("strcasecmp") || n.equals("strncat") || + n.equals("strerror") || n.equals("strncasecmp") || n.equals("strcat") || + n.equals("strcmp") || n.equals("strspn") || n.equals("strncmp") || + n.equals("strcpy") || n.equals("strncpy") || n.equals("strcoll") || + n.equals("stpcpy") || n.equals("strdup") || n.equals("strlen") || + n.equals("strxfrm") || n.equals("strtok") || n.equals("strnlen") || + n.equals("strstr") || n.equals("strcasestr") || n.equals("strscpn") || + n.equals("strpbrk") || n.equals("atoi") || n.equals("atol") || + n.equals("atoll") || n.equals("wcslen") || n.equals("wcscpy") || + n.equals("wcscmp")) { + return true; + } else { + return false; + } + } + + bool isMallocFn(StringRef &n) { + if (n.equals("malloc") || n.equals("calloc") || n.equals("realloc") || + n.equals("reallocarray") || n.equals("memalign") || + n.equals("__libc_memalign") || n.equals("aligned_alloc") || + n.equals("posix_memalign") || n.equals("valloc") || + n.equals("pvalloc") || n.equals("mmap")) { + return true; + } else { + return false; + } + } + + bool isFreeFn(StringRef &n) { + if (n.equals("free") || n.equals("cfree") || n.equals("munmap")) { + return true; + } else { + return false; + } + } + + bool isCppNewFn(StringRef &n) { + // operator new[](unsigned long) + // operator new[](unsigned long, std::nothrow_t const&) + // operator new[](unsigned long, std::align_val_t) + // operator new[](unsigned long, std::align_val_t, std::nothrow_t const&) + // operator new(unsigned long) + // operator new(unsigned long, std::nothrow_t const&) + // operator new(unsigned long, std::align_val_t) + // operator new(unsigned long, std::align_val_t, std::nothrow_t const&) + + if (n.equals("_Znam") || n.equals("_ZnamRKSt9nothrow_t") || + n.equals("_ZnamSt11align_val_t") || + n.equals("_ZnamSt11align_val_tRKSt9nothrow_t") || n.equals("_Znwm") || + n.equals("_ZnwmRKSt9nothrow_t") || n.equals("_ZnwmSt11align_val_t") || + n.equals("_ZnwmSt11align_val_tRKSt9nothrow_t")) { + return true; + } else { + return false; + } + } + + bool isCppDelete(StringRef &n) { + // operator delete[](void*) + // operator delete[](void*, unsigned long) + // operator delete[](void*, unsigned long, std::align_val_t) + // operator delete[](void*, std::nothrow_t const&) + // operator delete[](void*, std::align_val_t) + // operator delete[](void*, std::align_val_t, std::nothrow_t const&) + // operator delete(void*) + // operator delete(void*, unsigned long) + // operator delete(void*, unsigned long, std::align_val_t) + // operator delete(void*, std::nothrow_t const&) + // operator delete(void*, std::align_val_t) + // operator delete(void*, std::align_val_t, std::nothrow_t const&) + + if (n.equals("_ZdaPv") || n.equals("_ZdaPvm") || + n.equals("_ZdaPvmSt11align_val_t") || + n.equals("_ZdaPvRKSt9nothrow_t") || n.equals("_ZdaPvSt11align_val_t") || + n.equals("_ZdaPvSt11align_val_tRKSt9nothrow_t") || n.equals("_ZdlPv") || + n.equals("_ZdlPvm") || n.equals("_ZdlPvmSt11align_val_t") || + n.equals("_ZdlPvRKSt9nothrow_t") || n.equals("_ZdlPvSt11align_val_t") || + n.equals("_ZdlPvSt11align_val_tRKSt9nothrow_t") + + ) { + return true; + } else { + return false; + } + } +}; + +} // namespace + +inline bool file_exist(const std::string &name) { + std::ifstream f(name.c_str()); + return f.good(); +} + +#if USE_NEW_PM +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "AnalysisPass", "v0.1", + /* lambda to insert our pass into the pass pipeline. */ + [](PassBuilder &PB) { + + #if LLVM_VERSION_MAJOR <= 13 + using OptimizationLevel = typename PassBuilder::OptimizationLevel; + #endif + PB.registerOptimizerLastEPCallback( + [](ModulePassManager &MPM, OptimizationLevel OL) { + MPM.addPass(AnalysisPass()); + }); + }}; +} +#else +char AnalysisPass::ID = 0; +#endif + +#if USE_NEW_PM +PreservedAnalyses AnalysisPass::run(Module &M, ModuleAnalysisManager &MAM) { +#else +bool AnalysisPass::runOnModule(Module &M) { + +#endif + + std::string relFilename = M.getSourceFileName(); + llvm::SmallString<128> FilenameVec = StringRef(relFilename); + llvm::SmallString<128> RealPath; + llvm::sys::fs::real_path(FilenameVec, RealPath); + std::filesystem::path fp{std::string(RealPath)}; + std::string genericFilePath = fp.generic_string(); + + std::replace(genericFilePath.begin(), genericFilePath.end(), '/', '#'); + + /* + std::ifstream ifs; + ifs.open("/out/whitelist.txt"); + + if (ifs.fail()) { abort(); } + std::string srcfile; + std::vector srcList; + while (ifs >> srcfile) { + srcList.push_back(srcfile); + } + + bool run = false; + + for (std::string S : srcList) { + if (S == Filename) { + outs() << "Accept " << Filename << "\n"; + run = true; + } + } + */ + bool run = true; + + bool done_already = file_exist("/out/." + genericFilePath + ".json"); + if (done_already) { + run = false; + } else { + std::ofstream out_lock("/out/." + genericFilePath + ".json"); + } + + if (run) { + outs() << "Analysis on " + genericFilePath << "\n"; + LLVMContext &Ctx = M.getContext(); + auto moduleName = M.getName().str(); + // printf("Hello\n"); + for (auto ST : M.getIdentifiedStructTypes()) { + std::unordered_map types; + for (auto T : ST->elements()) { + types[T->getTypeID()] += 1; + auto ty = T; + while (true) { + // Recursive + if (ty->isPointerTy()) { + ty = ty->getPointerElementType(); + continue; + } else if (ty->isStructTy()) { + structLinks[ST->getStructName()].push_back(ty->getStructName()); + } + break; + } + } + + structDesc[ST->getStructName()] = types; + } + nlohmann::json res; + + for (auto &F : M) { + if (F.isDeclaration()) { continue; } + + DenseMap APIcalls; + DenseMap heapAPIs; + DenseMap memoryAPIs; + std::unordered_map nestedLevel; + std::unordered_map cmpGlobals; + std::unordered_map cmpNonZeros; + DenseMap structWrites; + std::unordered_map structArgs; + std::unordered_map cmpTypes; + std::unordered_map callArgTypes; + std::unordered_map storeTypes; + std::unordered_map loadTypes; + std::unordered_map allocaTypes; + std::unordered_map cmpComplexity; + + unsigned bb_cnt = 0; + unsigned inst_cnt = 0; + unsigned edges_cnt = 0; + + unsigned call_cnt = 0; + unsigned cmp_cnt = 0; + unsigned load_cnt = 0; + unsigned store_cnt = 0; + unsigned alloca_cnt = 0; + unsigned branch_cnt = 0; + unsigned binary_op_cnt = 0; + + entry_bb[F.getName()] = &F.getEntryBlock(); + for (auto &BB : F) { + bb_to_cur_loc[&BB] = bb_cnt; + bb_cnt++; + for (auto &IN : BB) { + /// Check data types + + auto meta = IN.getMetadata(0); + if (meta) { + DILocation *diloc = nullptr; + if ((diloc = dyn_cast(meta))) { + auto scope = diloc->getScope(); + uint32_t nested_level = travereScope(scope); + nestedLevel[nested_level] += 1; + } + } + + CallBase *callBase = nullptr; + CmpInst *cmpInst = nullptr; + LoadInst *loadInst = nullptr; + StoreInst *storeInst = nullptr; + AllocaInst *allocaInst = nullptr; + BranchInst *branchInst = nullptr; + BinaryOperator *binaryOp = nullptr; + + if ((binaryOp = dyn_cast(&IN))) { + binary_op_cnt++; + } else if ((branchInst = dyn_cast(&IN))) { + branch_cnt++; + } else if ((callBase = dyn_cast(&IN))) { + // What type of call is this? + auto F = callBase->getCalledFunction(); + if (F) { + StringRef name = F->getName(); + if (isLLVMIntrinsicFn(name)) { + // just ignore + continue; + } + APIcalls[name]++; + call_cnt++; + + calls_in_bb[&BB].push_back(name); + // Check memory related calls + if (isMallocFn(name)) { + heapAPIs["malloc"]++; + } else if (isFreeFn(name)) { + heapAPIs["free"]++; + } else if (isCppNewFn(name)) { + heapAPIs["new"]++; + } else if (isCppDelete(name)) { + heapAPIs["delete"]++; + } + + if (isMemorySensitiveFn(name)) { memoryAPIs[name]++; } + + if (isMemCmp(M, callBase)) { + cmpComplexity["mem cmp"]++; + } else if (isStrcmp(M, callBase) || isStrncmp(M, callBase) || + isGccStdStringCString(M, callBase) || + isGccStdStringStdString(M, callBase) || + isLlvmStdStringCString(M, callBase) || + isLlvmStdStringStdString(M, callBase)) { + cmpComplexity["str cmp"]++; + } + + for (auto arg = F->arg_begin(); arg != F->arg_end(); arg++) { + auto arg_ty = arg->getType(); + std::string type_str = typeWriter(arg_ty); + callArgTypes[type_str]++; + + auto ty = arg_ty; + while (true) { + // recursive + if (ty->isPointerTy()) { + ty = ty->getPointerElementType(); + continue; + } else if (ty->isStructTy()) { + structArgs[type_str]++; + } + break; + } + } + } + } else if ((cmpInst = dyn_cast(&IN))) { + FCmpInst *fcmp = nullptr; + ICmpInst *icmp = nullptr; + + if ((icmp = dyn_cast(cmpInst))) { + cmpComplexity["int cmp"]++; + } else if ((fcmp = dyn_cast(cmpInst))) { + cmpComplexity["float cmp"]++; + } + auto typ = cmpInst->getOperand(0)->getType(); + + auto op0 = cmpInst->getOperand(0); + auto op1 = cmpInst->getOperand(1); + uint32_t num_constants = 0; + uint32_t non_zero_constants = 0; + + Constant *c1 = nullptr; + Constant *c2 = nullptr; + + if ((c1 = dyn_cast(op0))) { + if (!c1->isZeroValue()) { non_zero_constants++; } + num_constants++; + } + + if ((c2 = dyn_cast(op1))) { + if (c2->isZeroValue()) { non_zero_constants++; } + num_constants++; + } + + cmpGlobals[num_constants]++; + cmpNonZeros[num_constants]++; + cmpTypes[typeWriter(typ)]++; + cmp_cnt++; + } else if ((loadInst = dyn_cast(&IN))) { + auto typ = loadInst->getType(); + loadTypes[typeWriter(typ)]++; + load_cnt++; + } else if ((storeInst = dyn_cast(&IN))) { + auto typ = storeInst->getValueOperand()->getType(); + storeTypes[typeWriter(typ)]++; + // Here check writes into structs + // check where storeInst stores into + auto op = storeInst->getPointerOperand(); + GetElementPtrInst *gep = nullptr; + if ((gep = dyn_cast(op))) { + // If this is a gep? + auto typ = gep->getSourceElementType(); + + if (typ->isStructTy()) { structWrites[typ->getStructName()]++; } + } + + store_cnt++; + } else if ((allocaInst = dyn_cast(&IN))) { + auto typ = allocaInst->getAllocatedType(); + allocaTypes[typeWriter(typ)]++; + alloca_cnt++; + } + + inst_cnt++; + } + + auto term = BB.getTerminator(); + edges_cnt += term->getNumSuccessors(); + + // Dump everything in this Fn + } + + std::string fnname = std::string(F.getName()); + if (bb_cnt) { res[fnname]["# BBs"] = bb_cnt; } + + if (inst_cnt) { res[fnname]["# insts"] = inst_cnt; } + + if (edges_cnt) { res[fnname]["# edges"] = edges_cnt; } + + if (binary_op_cnt) { res[fnname]["# binaryOp"] = binary_op_cnt; } + + if (call_cnt) { res[fnname]["# call"] = call_cnt; } + + if (cmp_cnt) { res[fnname]["# cmp"] = cmp_cnt; } + + if (load_cnt) { res[fnname]["# load"] = load_cnt; } + + if (store_cnt) { res[fnname]["# store"] = store_cnt; } + + if (alloca_cnt) { res[fnname]["# alloca"] = alloca_cnt; } + + if (branch_cnt) { res[fnname]["# branch"] = branch_cnt; } + + res[fnname]["ABC metric"] = + sqrt(alloca_cnt * alloca_cnt + branch_cnt * branch_cnt + + call_cnt * call_cnt); + res[fnname]["cyclomatic"] = edges_cnt - bb_cnt + 2; + + // outs() << "APIs:\n"; + for (auto record = APIcalls.begin(); record != APIcalls.end(); record++) { + auto key = record->getFirst(); + if (!isLLVMIntrinsicFn(key)) { + res[fnname]["AP"][std::string(key)] = APIcalls[key]; + // outs() << key << " " << APIcalls[key] << "\n"; + } + } + // outs() << "\n"; + + // outs() << "memoryAPIs:\n"; + for (auto record = heapAPIs.begin(); record != heapAPIs.end(); record++) { + auto key = record->getFirst(); + res[fnname]["h AP"][std::string(key)] = heapAPIs[key]; + // outs() << key << " " << heapAPIs[key] << "\n"; + } + // outs() << "\n"; + + for (auto record = memoryAPIs.begin(); record != memoryAPIs.end(); + record++) { + auto key = record->getFirst(); + res[fnname]["m AP"][std::string(key)] = memoryAPIs[key]; + // outs() << key << " " << memoryAPIs[key] << "\n"; + } + + for (auto record = nestedLevel.begin(); record != nestedLevel.end(); + record++) { + auto key = record->first; + res[fnname]["ne lv"][std::to_string(key)] = nestedLevel[key]; + // outs() << key << " " << memoryAPIs[key] << "\n"; + } + + for (auto record = cmpGlobals.begin(); record != cmpGlobals.end(); + record++) { + auto key = record->first; + res[fnname]["cm gl"][std::to_string(key)] = cmpGlobals[key]; + // outs() << key << " " << memoryAPIs[key] << "\n"; + } + + for (auto record = cmpNonZeros.begin(); record != cmpNonZeros.end(); + record++) { + auto key = record->first; + res[fnname]["cm nz"][std::to_string(key)] = cmpNonZeros[key]; + // outs() << key << " " << memoryAPIs[key] << "\n"; + } + + // outs() << "writesIntoStructs:\n"; + for (auto record = structWrites.begin(); record != structWrites.end(); + record++) { + auto key = record->getFirst(); + // Some are nameless struct + res[fnname]["wr st"][std::string(key)] = structWrites[key]; + // outs() << key << " " << structWrites[key] << "\n"; + } + // outs() << "\n"; + + // outs() << "StructsInArgs:\n"; + for (auto record = structArgs.begin(); record != structArgs.end(); + record++) { + auto key = record->first; + res[fnname]["str arg"][std::string(key)] = record->second; + // outs() << key << " " << record->second << "\n"; + } + // outs() << "\n"; + + // outs() << "CmpTypes:\n"; + for (auto record = cmpTypes.begin(); record != cmpTypes.end(); record++) { + res[fnname]["cm ty"][record->first] = record->second; + // outs() << record->first << " " << record->second << "\n"; + } + // outs() << "\n"; + + for (auto record = cmpComplexity.begin(); record != cmpComplexity.end(); + record++) { + res[fnname]["cm cm"][record->first] = record->second; + // outs() << record->first << " " << record->second << "\n"; + } + + // outs() << "CallArgTypes:\n"; + for (auto record = callArgTypes.begin(); record != callArgTypes.end(); + record++) { + res[fnname]["ar ty"][record->first] = record->second; + // outs() << record->first << " " << record->second << "\n"; + } + // outs() << "\n"; + + // outs() << "storeTypes:\n"; + for (auto record = storeTypes.begin(); record != storeTypes.end(); + record++) { + res[fnname]["st ty"][record->first] = record->second; + // outs() << record->first << " " << record->second << "\n"; + } + // outs() << "\n"; + + // outs() << "loadTypes:\n"; + for (auto record = loadTypes.begin(); record != loadTypes.end(); + record++) { + res[fnname]["l ty"][record->first] = record->second; + // outs() << record->first << " " << record->second << "\n"; + } + // outs() << "\n"; + + // outs() << "allocaTypes:\n"; + for (auto record = allocaTypes.begin(); record != allocaTypes.end(); + record++) { + res[fnname]["al ty"][record->first] = record->second; + // outs() << record->first << " " << record->second << "\n"; + } + // outs() << "\n"; + + if (getenv("ANALYSIS_OUTPUT_PATH")) { + if (std::ofstream(getenv("ANALYSIS_OUTPUT_PATH") + std::string("/") + + genericFilePath + ".json") + << res << "\n") { + } else { + abort(); + } + } else { + errs() << "output path not set!" + << "\n"; + } + } + + nlohmann::json struct_links; + // outs() << "StructLinks:\n"; + for (auto record = structLinks.begin(); record != structLinks.end(); + record++) { + StringRef key = record->getFirst(); + // outs() << "struct: " << key << "\t"; + std::vector links{}; + // outs() << "links: "; + for (auto item = structLinks[key].begin(); item != structLinks[key].end(); + item++) { + links.push_back(std::string(*item)); + // outs() << *item << " "; + } + struct_links[moduleName][std::string(key)]["lks"] = links; + // outs() << "\n"; + } + + for (auto record = structDesc.begin(); record != structDesc.end(); + record++) { + auto key = record->getFirst(); + struct_links[moduleName][std::string(key)]["desc"] = record->second; + } + + // outs() << "\n"; + + if (getenv("ANALYSIS_OUTPUT_PATH")) { + if (std::ofstream(getenv("ANALYSIS_OUTPUT_PATH") + std::string("/") + + genericFilePath + ".lks") + << struct_links << "\n") { + } else { + abort(); + } + } else { + errs() << "output path not set!" + << "\n"; + } + + nlohmann::json cfg; + + for (auto record = bb_to_cur_loc.begin(); record != bb_to_cur_loc.end(); + record++) { + auto current_bb = record->getFirst(); + auto loc = record->getSecond(); + Function *calling_func = current_bb->getParent(); + std::string func_name = std::string(""); + + if (calling_func) { + func_name = std::string(calling_func->getName()); + // outs() << "Function name: " << calling_func->getName() << "\n"; + } + + std::vector outgoing; + for (auto bb_successor = succ_begin(current_bb); + bb_successor != succ_end(current_bb); bb_successor++) { + outgoing.push_back(bb_to_cur_loc[*bb_successor]); + } + cfg["edges"][func_name][loc] = outgoing; + } + + for (auto record = calls_in_bb.begin(); record != calls_in_bb.end(); + record++) { + auto current_bb = record->getFirst(); + auto loc = bb_to_cur_loc[current_bb]; + Function *calling_func = current_bb->getParent(); + std::string func_name = std::string(""); + + if (calling_func) { + func_name = std::string(calling_func->getName()); + // outs() << "Function name: " << calling_func->getName() << "\n"; + } + + std::vector outgoing_funcs; + for (auto &item : record->getSecond()) { + outgoing_funcs.push_back(std::string(item)); + } + if (!outgoing_funcs.empty()) { + cfg["calls"][func_name][std::to_string(loc)] = outgoing_funcs; + } + } + + for (auto record = entry_bb.begin(); record != entry_bb.end(); record++) { + cfg["entries"][std::string(record->getFirst())] = + bb_to_cur_loc[record->getSecond()]; + } + + if (getenv("ANALYSIS_OUTPUT_PATH")) { + if (std::ofstream(getenv("ANALYSIS_OUTPUT_PATH") + std::string("/") + + genericFilePath + ".cfg") + << cfg << "\n") { + } else { + abort(); + } + + } else { + errs() << "output path not set!" + << "\n"; + } + } + +#if USE_NEW_PM + auto PA = PreservedAnalyses::all(); + return PA; +#else + return true; +#endif +} diff --git a/scripts/fmt_all.sh b/scripts/fmt_all.sh index 545a65dc44..18f2188d27 100755 --- a/scripts/fmt_all.sh +++ b/scripts/fmt_all.sh @@ -11,7 +11,7 @@ cargo +nightly fmt echo "[*] Formatting C(pp) files" # shellcheck disable=SC2046 -clang-format-18 -i --style=file $(find . -type f \( -name '*.cpp' -o -iname '*.hpp' -o -name '*.cc' -o -name '*.cxx' -o -name '*.cc' -o -name '*.c' -o -name '*.h' \) | grep -v '/target/' | grep -v 'libpng-1\.6\.37' | grep -v 'stb_image\.h' | grep -v 'dlmalloc\.c') +clang-format-18 -i --style=file $(find . -type f \( -name '*.cpp' -o -iname '*.hpp' -o -name '*.cc' -o -name '*.cxx' -o -name '*.cc' -o -name '*.c' -o -name '*.h' \) | grep -v '/target/' | grep -v 'libpng-1\.6\.37' | grep -v 'stb_image\.h' | grep -v 'dlmalloc\.c' | grep -v 'QEMU-Nyx') fuzzers=$(find ./fuzzers -maxdepth 1 -type d) backtrace_fuzzers=$(find ./fuzzers/backtrace_baby_fuzzers -maxdepth 1 -type d)