From 9d933b871345d334f646af841066952d5b691de9 Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Wed, 9 Aug 2023 09:31:47 -0700 Subject: [PATCH 01/10] Use UID for codeblocks --- data_specifications/specification.proto | 6 +++- include/anvill/Declarations.h | 3 +- lib/Declarations.cpp | 14 ++++----- lib/Lifters/BasicBlockLifter.cpp | 13 +++++---- lib/Lifters/FunctionLifter.cpp | 39 ++++++++++++++++--------- lib/Lifters/FunctionLifter.h | 6 ++-- lib/Protobuf.cpp | 21 ++++++------- tests/anvill_passes/CMakeLists.txt | 2 +- 8 files changed, 62 insertions(+), 42 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 9f7a6d963..1ac91adb4 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -271,10 +271,13 @@ message BlockContext { message CodeBlock { uint64 address = 1; string name = 2; + // Incoming block(s) by uid repeated uint64 incoming_blocks = 3; + // Outgoing block(s) by uid repeated uint64 outgoing_blocks = 4; uint32 size = 5; map context_assignments = 6; + uint64 uid = 7; } message Variables { @@ -309,10 +312,11 @@ message Function { uint64 entry_address = 1; FunctionLinkage func_linkage = 3; Callable callable = 4; + // Mapping of unique ID to codeblock map blocks = 5; map local_variables = 6; - // Keys are addresses of code blocks, each block + // Keys are unique IDs of code blocks, each block // may have a corresponding context map block_context = 7; StackEffects stack_effects = 8; diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index a6bdfe713..16af69daa 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -54,6 +54,7 @@ struct CodeBlock { // A block may have specific decoding context properties such as "TM=1" (the thumb bit is set) // So we declare the context assignments that occur at the entry point to a block. std::unordered_map context_assignments; + uint64_t uid; }; @@ -443,7 +444,7 @@ struct FunctionDecl : public CallableDecl { static Result Create(llvm::Function &func, const remill::Arch *arch); - SpecBlockContext GetBlockContext(std::uint64_t addr) const; + SpecBlockContext GetBlockContext(std::uint64_t uid) const; void AddBBContexts(std::unordered_map &contexts) const; diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index c66dd6e71..958b63bed 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -488,16 +488,16 @@ size_t FunctionDecl::GetPointerDisplacement() const { return this->parameter_size + this->parameter_offset; } -SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { +SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t uid) const { return SpecBlockContext( - *this, GetWithDef(addr, this->stack_offsets_at_entry, SpecStackOffsets()), - GetWithDef(addr, this->stack_offsets_at_exit, SpecStackOffsets()), - GetWithDef(addr, this->constant_values_at_entry, + *this, GetWithDef(uid, this->stack_offsets_at_entry, SpecStackOffsets()), + GetWithDef(uid, this->stack_offsets_at_exit, SpecStackOffsets()), + GetWithDef(uid, this->constant_values_at_entry, std::vector()), - GetWithDef(addr, this->constant_values_at_exit, + GetWithDef(uid, this->constant_values_at_exit, std::vector()), - GetWithDef(addr, this->live_regs_at_entry, std::vector()), - GetWithDef(addr, this->live_regs_at_exit, std::vector())); + GetWithDef(uid, this->live_regs_at_entry, std::vector()), + GetWithDef(uid, this->live_regs_at_exit, std::vector())); } std::optional diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index d690a175a..be0a66f0e 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -365,7 +365,7 @@ llvm::MDNode *BasicBlockLifter::GetBasicBlockAnnotation(uint64_t addr) const { llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() { std::string name_ = "func" + std::to_string(decl.address) + "basic_block" + - std::to_string(this->block_def.addr); + std::to_string(this->block_def.addr) + "_" + std::to_string(this->block_def.uid); auto &context = this->semantics_module->getContext(); llvm::FunctionType *lifted_func_type = llvm::dyn_cast(remill::RecontextualizeType( @@ -586,14 +586,12 @@ void BasicBlockLifter::TerminateBasicBlockFunction( auto pc = ir.CreateLoad(address_type, bbfunc.next_pc_out); auto sw = ir.CreateSwitch(pc, this->invalid_successor_block); - for (auto e : this->block_def.outgoing_edges) { - auto succ_const = llvm::ConstantInt::get( - llvm::cast(this->address_type), e); - + for (auto edge_uid : this->block_def.outgoing_edges) { auto calling_bb = llvm::BasicBlock::Create(next_mem->getContext(), "", bbfunc.func); llvm::IRBuilder<> calling_bb_builder(calling_bb); - auto &child_lifter = this->flifter.GetOrCreateBasicBlockLifter(e); + auto edge_bb = this->decl.cfg.at(edge_uid); + auto &child_lifter = this->flifter.GetOrCreateBasicBlockLifter(edge_bb.uid); auto retval = child_lifter.ControlFlowCallBasicBlockFunction( caller, calling_bb_builder, this->state_ptr, bbfunc.stack, next_mem); if (this->flifter.curr_decl->type->getReturnType()->isVoidTy()) { @@ -601,6 +599,9 @@ void BasicBlockLifter::TerminateBasicBlockFunction( } else { calling_bb_builder.CreateRet(retval); } + + auto succ_const = llvm::ConstantInt::get( + llvm::cast(this->address_type), edge_bb.addr); sw->addCase(succ_const, calling_bb); } diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 6d74702ee..e99e60687 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -335,22 +336,26 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { return GetOrDeclareFunction(decl); } -BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(uint64_t addr) { - std::pair key{curr_decl->address, addr}; +static uint64_t GetRandUid() { + static std::random_device rd; + static std::mt19937_64 engine(rd()); + static std::uniform_int_distribution dist(0, UINT64_MAX); + return dist(engine); +} + +BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(uint64_t uid) { + std::pair key{curr_decl->address, uid}; auto lifter = this->bb_lifters.find(key); if (lifter != this->bb_lifters.end()) { return lifter->second; } std::unique_ptr context = std::make_unique( - this->curr_decl->GetBlockContext(addr)); + this->curr_decl->GetBlockContext(uid)); - CodeBlock defblk = {addr, 0, std::unordered_set(), - std::unordered_map()}; - auto maybe_blk = this->curr_decl->cfg.find(addr); - if (maybe_blk != this->curr_decl->cfg.end()) { - defblk = maybe_blk->second; - } + auto &cfg = this->curr_decl->cfg; + CHECK(cfg.contains(uid)); + CodeBlock defblk = cfg.find(uid)->second; auto inserted = this->bb_lifters.emplace( key, @@ -362,7 +367,7 @@ BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(uint64_t addr) { const BasicBlockLifter & FunctionLifter::LiftBasicBlockFunction(const CodeBlock &blk) { - auto &lifter = this->GetOrCreateBasicBlockLifter(blk.addr); + auto &lifter = this->GetOrCreateBasicBlockLifter(blk.uid); lifter.LiftBasicBlockFunction(); return lifter; } @@ -497,7 +502,11 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // // TODO: This could be a thunk, that we are maybe lifting on purpose. // How should control flow redirection behave in this case? - auto &entry_lifter = this->GetOrCreateBasicBlockLifter(this->func_address); + auto &cfg = this->curr_decl->cfg; + auto blk = std::find_if(std::begin(cfg), std::end(cfg), + [this](auto&& p) { return p.second.addr == this->func_address; }); + CHECK(blk != cfg.end()); + auto &entry_lifter = this->GetOrCreateBasicBlockLifter(blk->second.uid); auto call_inst = entry_lifter.CallBasicBlockFunction( ir, lifted_func_st.state_ptr, abstract_stack, this->mem_ptr_ref); @@ -700,8 +709,10 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, std::string prefix = "func" + std::to_string(decl.address); if (!func->isDeclaration()) { - for (auto &[block_addr, block] : decl.cfg) { - std::string name = prefix + "basic_block" + std::to_string(block_addr); + for (auto &[block_uid, block] : decl.cfg) { + CHECK(block_uid == block.uid); + std::string name = prefix + "basic_block" + std::to_string(block.addr) + "_" + std::to_string(block.uid); + auto new_version = target_module->getFunction(name); auto old_version = semantics_module->getFunction(name); if (!new_version) { @@ -714,7 +725,7 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, remill::CloneFunctionInto(old_version, new_version); new_version->setMetadata( kBasicBlockMetadata, - this->GetAddrAnnotation(block_addr, module_context)); + this->GetAddrAnnotation(block.addr, module_context)); CHECK(anvill::GetBasicBlockAddr(new_version).has_value()); } } diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 2fe5ebc5e..e8c0a7924 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -104,7 +104,9 @@ class FunctionLifter : public CodeLifter { const FunctionDecl &decl, EntityLifterImpl &lifter_context) const; - BasicBlockLifter &GetOrCreateBasicBlockLifter(uint64_t addr); + // Get or create a basic block lifter for the basic block with specified + // uid. If a lifter for the uid does not exist, this function will create it + BasicBlockLifter &GetOrCreateBasicBlockLifter(uint64_t uid); const BasicBlockLifter &LiftBasicBlockFunction(const CodeBlock &); @@ -180,7 +182,7 @@ class FunctionLifter : public CodeLifter { // Maps program counters to lifted functions. std::unordered_map addr_to_func; - // maps a bbaddr to the lifter for that block + // maps a uid to the lifter for that block std::unordered_map, BasicBlockLifter> bb_lifters; diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 5684a52b0..bca36fd88 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -531,7 +531,7 @@ Result ProtobufTranslator::DecodeFunction( if (!function.has_frame()) { return std::string("All functions should have a frame"); } - auto frame = function.frame(); + const auto& frame = function.frame(); decl.stack_depth = frame.frame_size(); decl.ret_ptr_offset = frame.return_address_offset(); @@ -608,7 +608,7 @@ void ProtobufTranslator::AddLiveValuesToBB( void ProtobufTranslator::ParseCFGIntoFunction( const ::specification::Function &obj, FunctionDecl &decl) const { - for (auto blk : obj.blocks()) { + for (const auto& blk : obj.blocks()) { CodeBlock nblk = { blk.second.address(), blk.second.size(), @@ -616,16 +616,17 @@ void ProtobufTranslator::ParseCFGIntoFunction( blk.second.outgoing_blocks().end()}, {blk.second.context_assignments().begin(), blk.second.context_assignments().end()}, + blk.first, }; decl.cfg.emplace(blk.first, std::move(nblk)); } - for (auto &[blk_addr, ctx] : obj.block_context()) { + for (auto &[blk_uid, ctx] : obj.block_context()) { std::vector stack_offsets_at_entry, stack_offsets_at_exit; std::vector constant_values_at_entry, constant_values_at_exit; - auto blk = decl.cfg[blk_addr]; + auto blk = decl.cfg[blk_uid]; auto symval_to_domains = [&](const specification::ValueMapping &symval, std::vector &stack_offsets, std::vector &constant_values) { @@ -680,20 +681,20 @@ void ProtobufTranslator::ParseCFGIntoFunction( for (auto &symval : ctx.symvals_at_entry()) { symval_to_domains(symval, - decl.stack_offsets_at_entry[blk_addr].affine_equalities, - decl.constant_values_at_entry[blk_addr]); + decl.stack_offsets_at_entry[blk_uid].affine_equalities, + decl.constant_values_at_entry[blk_uid]); } for (auto &symval : ctx.symvals_at_exit()) { symval_to_domains(symval, - decl.stack_offsets_at_exit[blk_addr].affine_equalities, - decl.constant_values_at_exit[blk_addr]); + decl.stack_offsets_at_exit[blk_uid].affine_equalities, + decl.constant_values_at_exit[blk_uid]); } - this->AddLiveValuesToBB(decl.live_regs_at_entry, blk_addr, + this->AddLiveValuesToBB(decl.live_regs_at_entry, blk_uid, ctx.live_at_entries()); - this->AddLiveValuesToBB(decl.live_regs_at_exit, blk_addr, + this->AddLiveValuesToBB(decl.live_regs_at_exit, blk_uid, ctx.live_at_exits()); } } diff --git a/tests/anvill_passes/CMakeLists.txt b/tests/anvill_passes/CMakeLists.txt index 8c4c78b25..ff406b2d3 100644 --- a/tests/anvill_passes/CMakeLists.txt +++ b/tests/anvill_passes/CMakeLists.txt @@ -42,6 +42,6 @@ target_include_directories(test_anvill_passes PRIVATE add_test( NAME test_anvill_passes - COMMAND "$" + COMMAND test_anvill_passes WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" ) From 6daea1997ce2d1d2a132fd5d6951ebb43bd2587a Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Wed, 27 Sep 2023 16:28:56 -0400 Subject: [PATCH 02/10] Fix BasicBlockContext lookups Fixes the missing literal_struct_2 issue --- include/anvill/ABI.h | 3 ++- include/anvill/Passes/BasicBlockPass.h | 8 ++++---- include/anvill/Specification.h | 2 +- include/anvill/Utils.h | 1 + lib/ABI.cpp | 3 ++- lib/Declarations.cpp | 10 +++++----- lib/Lifters/BasicBlockLifter.cpp | 11 ++++++++--- lib/Lifters/BasicBlockLifter.h | 3 ++- lib/Lifters/CodeLifter.cpp | 10 ++++++++++ lib/Lifters/CodeLifter.h | 3 +++ lib/Lifters/FunctionLifter.cpp | 11 +++++++---- lib/Passes/ReplaceStackReferences.cpp | 3 ++- lib/Specification.cpp | 4 ++-- lib/Utils.cpp | 12 +++++++++++- 14 files changed, 60 insertions(+), 24 deletions(-) diff --git a/include/anvill/ABI.h b/include/anvill/ABI.h index 02cf9e302..6eb8748e9 100644 --- a/include/anvill/ABI.h +++ b/include/anvill/ABI.h @@ -86,7 +86,8 @@ extern const std::string kAnvillStackZero; // use this to queue off of then just move it after the split extern const std::string kStackMetadata; -extern const std::string kBasicBlockMetadata; +extern const std::string kBasicBlockAddrMetadata; +extern const std::string kBasicBlockUidMetadata; /// Intrinsic that acts like a return instruction but leaves both the basic block and the parent function. diff --git a/include/anvill/Passes/BasicBlockPass.h b/include/anvill/Passes/BasicBlockPass.h index 7ed23fc93..11837023c 100644 --- a/include/anvill/Passes/BasicBlockPass.h +++ b/include/anvill/Passes/BasicBlockPass.h @@ -15,7 +15,7 @@ namespace anvill { class BasicBlockContexts { public: virtual std::optional> - GetBasicBlockContextForAddr(uint64_t addr) const = 0; + GetBasicBlockContextForUid(uint64_t uid) const = 0; virtual const FunctionDecl &GetFunctionAtAddress(uint64_t addr) const = 0; }; @@ -33,9 +33,9 @@ class BasicBlockPass : public llvm::PassInfoMixin> { llvm::PreservedAnalyses run(llvm::Function &F, llvm::FunctionAnalysisManager &AM) { auto &bb_pass = *static_cast(this); - auto bbaddr = anvill::GetBasicBlockAddr(&F); - if (bbaddr.has_value()) { - auto maybe_bb_cont = contexts.GetBasicBlockContextForAddr(*bbaddr); + auto bbuid = anvill::GetBasicBlockUid(&F); + if (bbuid.has_value()) { + auto maybe_bb_cont = contexts.GetBasicBlockContextForUid(*bbuid); if (maybe_bb_cont) { const BasicBlockContext &bb_cont = *maybe_bb_cont; auto &parent_func = diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index 18d7fed65..c637add50 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -98,7 +98,7 @@ class SpecBlockContexts : public BasicBlockContexts { SpecBlockContexts(const Specification &spec); virtual std::optional> - GetBasicBlockContextForAddr(uint64_t addr) const override; + GetBasicBlockContextForUid(uint64_t uid) const override; virtual const FunctionDecl & GetFunctionAtAddress(uint64_t addr) const override; diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index ef27f3a85..c7c238643 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -135,6 +135,7 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, llvm::Value *state_ptr, llvm::Value *mem_ptr); std::optional GetBasicBlockAddr(llvm::Function *func); +std::optional GetBasicBlockUid(llvm::Function *func); llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func); diff --git a/lib/ABI.cpp b/lib/ABI.cpp index 70b35e410..1e45c038e 100644 --- a/lib/ABI.cpp +++ b/lib/ABI.cpp @@ -82,7 +82,8 @@ const std::string kAnvillDataProvenanceFunc(kAnvillNamePrefix + // `alloca`. const std::string kAnvillStackZero(kAnvillNamePrefix + "stack_zero"); -const std::string kBasicBlockMetadata(kAnvillNamePrefix + "basic_block_md"); +const std::string kBasicBlockAddrMetadata(kAnvillNamePrefix + "basic_block_addr_md"); +const std::string kBasicBlockUidMetadata(kAnvillNamePrefix + "basic_block_uid_md"); const std::string kStackMetadata(kAnvillNamePrefix + "stack_alloc"); diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 958b63bed..c626bb64e 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -78,8 +78,8 @@ VariableDecl::DeclareInModule(const std::string &name, void FunctionDecl::AddBBContexts( std::unordered_map &contexts) const { - for (const auto &[addr, _] : this->cfg) { - contexts.insert({addr, this->GetBlockContext(addr)}); + for (const auto &[uid, _] : this->cfg) { + contexts.insert({uid, this->GetBlockContext(uid)}); } } @@ -475,12 +475,12 @@ void CallableDecl::OverrideFunctionTypeWithABIReturnLayout() { namespace { template -V GetWithDef(uint64_t addr, const std::unordered_map &map, V def) { - if (map.find(addr) == map.end()) { +V GetWithDef(uint64_t uid, const std::unordered_map &map, V def) { + if (map.find(uid) == map.end()) { return def; } - return map.find(addr)->second; + return map.find(uid)->second; } } // namespace diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index cc3cd1c94..55a0189e4 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -402,9 +402,12 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { } -llvm::MDNode *BasicBlockLifter::GetBasicBlockAnnotation(uint64_t addr) const { +llvm::MDNode *BasicBlockLifter::GetBasicBlockAddrAnnotation(uint64_t addr) const { return this->GetAddrAnnotation(addr, this->semantics_module->getContext()); } +llvm::MDNode *BasicBlockLifter::GetBasicBlockUidAnnotation(uint64_t uid) const { + return this->GetUidAnnotation(uid, this->semantics_module->getContext()); +} llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() { std::string name_ = "func" + std::to_string(decl.address) + "basic_block" + @@ -437,8 +440,10 @@ llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() { BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto func = bb_func; - func->setMetadata(anvill::kBasicBlockMetadata, - GetBasicBlockAnnotation(this->block_def.addr)); + func->setMetadata(anvill::kBasicBlockAddrMetadata, + GetBasicBlockAddrAnnotation(this->block_def.addr)); + func->setMetadata(anvill::kBasicBlockUidMetadata, + GetBasicBlockUidAnnotation(this->block_def.uid)); auto &context = this->semantics_module->getContext(); llvm::FunctionType *lifted_func_type = diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index e6c065a97..7a6376936 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -107,7 +107,8 @@ class BasicBlockLifter : public CodeLifter { remill::DecodingContext context); - llvm::MDNode *GetBasicBlockAnnotation(uint64_t addr) const; + llvm::MDNode *GetBasicBlockAddrAnnotation(uint64_t addr) const; + llvm::MDNode *GetBasicBlockUidAnnotation(uint64_t uid) const; public: BasicBlockLifter(std::unique_ptr block_context, diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index 2b8e921d8..dac866c7f 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -57,6 +57,8 @@ CodeLifter::CodeLifter(const LifterOptions &options, type_specifier(type_specifier), address_type( llvm::Type::getIntNTy(llvm_context, options.arch->address_size)), + uid_type( + llvm::Type::getInt64Ty(llvm_context)), i8_type(llvm::Type::getInt8Ty(llvm_context)), i8_zero(llvm::Constant::getNullValue(i8_type)), i32_type(llvm::Type::getInt32Ty(llvm_context)), @@ -191,6 +193,14 @@ llvm::MDNode *CodeLifter::GetAddrAnnotation(uint64_t addr, return llvm::MDNode::get(context, pc_md); } +llvm::MDNode *CodeLifter::GetUidAnnotation(uint64_t uid, + llvm::LLVMContext &context) const { + auto uid_val = llvm::ConstantInt::get( + remill::RecontextualizeType(uid_type, context), uid); + auto uid_md = llvm::ValueAsMetadata::get(uid_val); + return llvm::MDNode::get(context, uid_md); +} + // Allocate and initialize the state structure. llvm::Value * CodeLifter::AllocateAndInitializeStateStructure(llvm::BasicBlock *block, diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index 195815eeb..8b03deaf5 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -45,6 +45,7 @@ class CodeLifter { const TypeProvider &type_provider; const TypeTranslator &type_specifier; llvm::IntegerType *const address_type; + llvm::IntegerType *const uid_type; // Convenient to keep around. @@ -78,6 +79,8 @@ class CodeLifter { llvm::MDNode *GetAddrAnnotation(uint64_t addr, llvm::LLVMContext &context) const; + llvm::MDNode *GetUidAnnotation(uint64_t uid, + llvm::LLVMContext &context) const; public: CodeLifter(const LifterOptions &options, llvm::Module *semantics_module, diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index e99e60687..daa508067 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -354,7 +354,6 @@ BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(uint64_t uid) { this->curr_decl->GetBlockContext(uid)); auto &cfg = this->curr_decl->cfg; - CHECK(cfg.contains(uid)); CodeBlock defblk = cfg.find(uid)->second; auto inserted = this->bb_lifters.emplace( @@ -384,8 +383,8 @@ void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state, << ": " << this->curr_decl->cfg.size(); - for (const auto &[addr, blk] : this->curr_decl->cfg) { - DLOG(INFO) << "Visiting: " << std::hex << addr; + for (const auto &[uid, blk] : this->curr_decl->cfg) { + DLOG(INFO) << "Visiting: " << std::hex << blk.addr << " " << std::dec << uid; this->VisitBlock(blk, lifted_function_state, abstract_stack); } } @@ -724,9 +723,13 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, } remill::CloneFunctionInto(old_version, new_version); new_version->setMetadata( - kBasicBlockMetadata, + kBasicBlockAddrMetadata, this->GetAddrAnnotation(block.addr, module_context)); + new_version->setMetadata( + kBasicBlockUidMetadata, + this->GetUidAnnotation(block.uid, module_context)); CHECK(anvill::GetBasicBlockAddr(new_version).has_value()); + CHECK(anvill::GetBasicBlockUid(new_version).has_value()); } } diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 3d9bfbfa7..7d3f5270b 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -255,7 +255,8 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( AbstractStack::StackTypeFromSize(F.getContext(), overrunsz)); DLOG(INFO) << "Replacing stack vars in bb: " << std::hex - << *anvill::GetBasicBlockAddr(&F); + << *anvill::GetBasicBlockAddr(&F) << " " << std::dec + << *anvill::GetBasicBlockUid(&F); DLOG(INFO) << "Stack size " << cont.GetStackSize(); DLOG(INFO) << "Max stack size " << cont.GetMaxStackSize(); AbstractStack stk( diff --git a/lib/Specification.cpp b/lib/Specification.cpp index dbf8fc5c1..2ecbc4836 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -447,8 +447,8 @@ SpecBlockContexts::SpecBlockContexts(const Specification &spec) { } std::optional> -SpecBlockContexts::GetBasicBlockContextForAddr(uint64_t addr) const { - auto cont = this->contexts.find(addr); +SpecBlockContexts::GetBasicBlockContextForUid(uint64_t uid) const { + auto cont = this->contexts.find(uid); if (cont == this->contexts.end()) { return std::nullopt; } diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 5f33dced1..211564cc9 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -970,7 +970,17 @@ bool CanBeAliased(llvm::Value *val) { } std::optional GetBasicBlockAddr(llvm::Function *func) { - auto meta = func->getMetadata(kBasicBlockMetadata); + auto meta = func->getMetadata(kBasicBlockAddrMetadata); + if (!meta) { + return std::nullopt; + } + + auto v = llvm::cast(meta->getOperand(0))->getValue(); + + return llvm::cast(v)->getLimitedValue(); +} +std::optional GetBasicBlockUid(llvm::Function *func) { + auto meta = func->getMetadata(kBasicBlockUidMetadata); if (!meta) { return std::nullopt; } From 4cfe2d955b1b70f2e3d0fd984cfd1e43b23a19a3 Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Thu, 28 Sep 2023 11:55:15 -0400 Subject: [PATCH 03/10] Test CI --- .github/workflows/build.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7c02b969a..373326c98 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,15 +14,6 @@ on: # - cron: "0 */6 * * *" push: - branches: - - "*" - - tags: - - "*" - - pull_request: - branches: - - "*" jobs: cleanup_stale_workflows: From b73850884dda447ec54950864b0741bae1420c53 Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Fri, 29 Sep 2023 10:02:19 -0400 Subject: [PATCH 04/10] Strong Uid typing --- include/anvill/Declarations.h | 38 ++++++++++++++++++-------- include/anvill/Passes/BasicBlockPass.h | 2 +- include/anvill/Specification.h | 4 +-- include/anvill/Utils.h | 2 +- lib/Declarations.cpp | 6 ++-- lib/Lifters/BasicBlockLifter.cpp | 4 +-- lib/Lifters/BasicBlockLifter.h | 2 +- lib/Lifters/CodeLifter.cpp | 6 ++-- lib/Lifters/CodeLifter.h | 3 +- lib/Lifters/FunctionLifter.cpp | 12 ++++---- lib/Lifters/FunctionLifter.h | 2 +- lib/Passes/ReplaceStackReferences.cpp | 2 +- lib/Protobuf.cpp | 20 ++++++++------ lib/Protobuf.h | 4 +-- lib/Specification.cpp | 2 +- lib/Utils.cpp | 4 +-- 16 files changed, 68 insertions(+), 45 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 2748c6e21..6aa22a40c 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -45,16 +46,31 @@ struct Register; } // namespace remill namespace anvill { +struct Uid { + std::uint64_t value; + bool operator==(const Uid &) const = default; +}; + +} + +template <> +struct std::hash { + size_t operator()(const anvill::Uid &uid) const noexcept { + return std::hash()(uid.value); + } +}; + +namespace anvill { struct CodeBlock { uint64_t addr; uint32_t size; - std::unordered_set outgoing_edges; + std::unordered_set outgoing_edges; // The set of context assignments that occur at the entry point to this block. // A block may have specific decoding context properties such as "TM=1" (the thumb bit is set) // So we declare the context assignments that occur at the entry point to a block. std::unordered_map context_assignments; - uint64_t uid; + Uid uid; }; @@ -401,24 +417,24 @@ struct FunctionDecl : public CallableDecl { bool is_extern{false}; // These are the blocks contained within the function representing the CFG. - std::unordered_map cfg; + std::unordered_map cfg; std::unordered_map locals; - std::unordered_map stack_offsets_at_entry; + std::unordered_map stack_offsets_at_entry; - std::unordered_map stack_offsets_at_exit; + std::unordered_map stack_offsets_at_exit; - std::unordered_map> + std::unordered_map> live_regs_at_entry; - std::unordered_map> + std::unordered_map> live_regs_at_exit; - std::unordered_map> + std::unordered_map> constant_values_at_entry; - std::unordered_map> + std::unordered_map> constant_values_at_exit; // sorted vector of hints @@ -452,10 +468,10 @@ struct FunctionDecl : public CallableDecl { static Result Create(llvm::Function &func, const remill::Arch *arch); - SpecBlockContext GetBlockContext(std::uint64_t uid) const; + SpecBlockContext GetBlockContext(Uid uid) const; void - AddBBContexts(std::unordered_map &contexts) const; + AddBBContexts(std::unordered_map &contexts) const; }; // A call site decl, as represented at a "near ABI" level. This is like a diff --git a/include/anvill/Passes/BasicBlockPass.h b/include/anvill/Passes/BasicBlockPass.h index 11837023c..e3f5aeee0 100644 --- a/include/anvill/Passes/BasicBlockPass.h +++ b/include/anvill/Passes/BasicBlockPass.h @@ -15,7 +15,7 @@ namespace anvill { class BasicBlockContexts { public: virtual std::optional> - GetBasicBlockContextForUid(uint64_t uid) const = 0; + GetBasicBlockContextForUid(Uid uid) const = 0; virtual const FunctionDecl &GetFunctionAtAddress(uint64_t addr) const = 0; }; diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index c637add50..00ea1f0e5 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -91,14 +91,14 @@ struct ValueDecl; class Specification; class SpecBlockContexts : public BasicBlockContexts { - std::unordered_map contexts; + std::unordered_map contexts; std::unordered_map> funcs; public: SpecBlockContexts(const Specification &spec); virtual std::optional> - GetBasicBlockContextForUid(uint64_t uid) const override; + GetBasicBlockContextForUid(Uid uid) const override; virtual const FunctionDecl & GetFunctionAtAddress(uint64_t addr) const override; diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index c7c238643..85ec298b7 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -135,7 +135,7 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, llvm::Value *state_ptr, llvm::Value *mem_ptr); std::optional GetBasicBlockAddr(llvm::Function *func); -std::optional GetBasicBlockUid(llvm::Function *func); +std::optional GetBasicBlockUid(llvm::Function *func); llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func); diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index c626bb64e..46800c75b 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -77,7 +77,7 @@ VariableDecl::DeclareInModule(const std::string &name, } void FunctionDecl::AddBBContexts( - std::unordered_map &contexts) const { + std::unordered_map &contexts) const { for (const auto &[uid, _] : this->cfg) { contexts.insert({uid, this->GetBlockContext(uid)}); } @@ -475,7 +475,7 @@ void CallableDecl::OverrideFunctionTypeWithABIReturnLayout() { namespace { template -V GetWithDef(uint64_t uid, const std::unordered_map &map, V def) { +V GetWithDef(Uid uid, const std::unordered_map &map, V def) { if (map.find(uid) == map.end()) { return def; } @@ -488,7 +488,7 @@ size_t FunctionDecl::GetPointerDisplacement() const { return this->parameter_size + this->parameter_offset; } -SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t uid) const { +SpecBlockContext FunctionDecl::GetBlockContext(Uid uid) const { return SpecBlockContext( *this, GetWithDef(uid, this->stack_offsets_at_entry, SpecStackOffsets()), GetWithDef(uid, this->stack_offsets_at_exit, SpecStackOffsets()), diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 55a0189e4..85071dcea 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -405,13 +405,13 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { llvm::MDNode *BasicBlockLifter::GetBasicBlockAddrAnnotation(uint64_t addr) const { return this->GetAddrAnnotation(addr, this->semantics_module->getContext()); } -llvm::MDNode *BasicBlockLifter::GetBasicBlockUidAnnotation(uint64_t uid) const { +llvm::MDNode *BasicBlockLifter::GetBasicBlockUidAnnotation(Uid uid) const { return this->GetUidAnnotation(uid, this->semantics_module->getContext()); } llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() { std::string name_ = "func" + std::to_string(decl.address) + "basic_block" + - std::to_string(this->block_def.addr) + "_" + std::to_string(this->block_def.uid); + std::to_string(this->block_def.addr) + "_" + std::to_string(this->block_def.uid.value); auto &context = this->semantics_module->getContext(); llvm::FunctionType *lifted_func_type = llvm::dyn_cast(remill::RecontextualizeType( diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 7a6376936..89021c20b 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -108,7 +108,7 @@ class BasicBlockLifter : public CodeLifter { llvm::MDNode *GetBasicBlockAddrAnnotation(uint64_t addr) const; - llvm::MDNode *GetBasicBlockUidAnnotation(uint64_t uid) const; + llvm::MDNode *GetBasicBlockUidAnnotation(Uid uid) const; public: BasicBlockLifter(std::unique_ptr block_context, diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index dac866c7f..f853c83c1 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -20,6 +20,8 @@ #include +#include "anvill/Declarations.h" + namespace anvill { namespace { // Clear out LLVM variable names. They're usually not helpful. @@ -193,10 +195,10 @@ llvm::MDNode *CodeLifter::GetAddrAnnotation(uint64_t addr, return llvm::MDNode::get(context, pc_md); } -llvm::MDNode *CodeLifter::GetUidAnnotation(uint64_t uid, +llvm::MDNode *CodeLifter::GetUidAnnotation(Uid uid, llvm::LLVMContext &context) const { auto uid_val = llvm::ConstantInt::get( - remill::RecontextualizeType(uid_type, context), uid); + remill::RecontextualizeType(uid_type, context), uid.value); auto uid_md = llvm::ValueAsMetadata::get(uid_val); return llvm::MDNode::get(context, uid_md); } diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index 8b03deaf5..2df69cf6f 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -7,6 +7,7 @@ #include #include +#include "anvill/Declarations.h" #include "anvill/Lifters.h" namespace anvill { @@ -79,7 +80,7 @@ class CodeLifter { llvm::MDNode *GetAddrAnnotation(uint64_t addr, llvm::LLVMContext &context) const; - llvm::MDNode *GetUidAnnotation(uint64_t uid, + llvm::MDNode *GetUidAnnotation(Uid uid, llvm::LLVMContext &context) const; public: diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index daa508067..74b128eb2 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -336,15 +336,15 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { return GetOrDeclareFunction(decl); } -static uint64_t GetRandUid() { +static Uid GetRandUid() { static std::random_device rd; static std::mt19937_64 engine(rd()); static std::uniform_int_distribution dist(0, UINT64_MAX); - return dist(engine); + return {dist(engine)}; } -BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(uint64_t uid) { - std::pair key{curr_decl->address, uid}; +BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(Uid uid) { + std::pair key{curr_decl->address, uid.value}; auto lifter = this->bb_lifters.find(key); if (lifter != this->bb_lifters.end()) { return lifter->second; @@ -384,7 +384,7 @@ void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state, for (const auto &[uid, blk] : this->curr_decl->cfg) { - DLOG(INFO) << "Visiting: " << std::hex << blk.addr << " " << std::dec << uid; + DLOG(INFO) << "Visiting: " << std::hex << blk.addr << " " << std::dec << uid.value; this->VisitBlock(blk, lifted_function_state, abstract_stack); } } @@ -710,7 +710,7 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, if (!func->isDeclaration()) { for (auto &[block_uid, block] : decl.cfg) { CHECK(block_uid == block.uid); - std::string name = prefix + "basic_block" + std::to_string(block.addr) + "_" + std::to_string(block.uid); + std::string name = prefix + "basic_block" + std::to_string(block.addr) + "_" + std::to_string(block.uid.value); auto new_version = target_module->getFunction(name); auto old_version = semantics_module->getFunction(name); diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index e8c0a7924..3ddee5139 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -106,7 +106,7 @@ class FunctionLifter : public CodeLifter { // Get or create a basic block lifter for the basic block with specified // uid. If a lifter for the uid does not exist, this function will create it - BasicBlockLifter &GetOrCreateBasicBlockLifter(uint64_t uid); + BasicBlockLifter &GetOrCreateBasicBlockLifter(Uid uid); const BasicBlockLifter &LiftBasicBlockFunction(const CodeBlock &); diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 7d3f5270b..5ff5fbf60 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -256,7 +256,7 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( DLOG(INFO) << "Replacing stack vars in bb: " << std::hex << *anvill::GetBasicBlockAddr(&F) << " " << std::dec - << *anvill::GetBasicBlockUid(&F); + << (*anvill::GetBasicBlockUid(&F)).value; DLOG(INFO) << "Stack size " << cont.GetStackSize(); DLOG(INFO) << "Max stack size " << cont.GetMaxStackSize(); AbstractStack stk( diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 29a5213f6..900cd34a7 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -611,11 +611,11 @@ Result ProtobufTranslator::DecodeFunction( } void ProtobufTranslator::AddLiveValuesToBB( - std::unordered_map> &map, - uint64_t bb_addr, + std::unordered_map> &map, + Uid bb_uid, const ::google::protobuf::RepeatedPtrField<::specification::Parameter> &values) const { - auto &v = map.insert({bb_addr, std::vector()}).first->second; + auto &v = map.insert({bb_uid, std::vector()}).first->second; for (auto var : values) { auto param = DecodeParameter(var); @@ -630,23 +630,27 @@ void ProtobufTranslator::AddLiveValuesToBB( void ProtobufTranslator::ParseCFGIntoFunction( const ::specification::Function &obj, FunctionDecl &decl) const { for (const auto& blk : obj.blocks()) { + std::unordered_set tmp; + for (auto o : blk.second.outgoing_blocks()) { + tmp.insert({o}); + } CodeBlock nblk = { blk.second.address(), blk.second.size(), - {blk.second.outgoing_blocks().begin(), - blk.second.outgoing_blocks().end()}, + tmp, {blk.second.context_assignments().begin(), blk.second.context_assignments().end()}, - blk.first, + {blk.first}, }; - decl.cfg.emplace(blk.first, std::move(nblk)); + decl.cfg.emplace(Uid{blk.first}, std::move(nblk)); } - for (auto &[blk_uid, ctx] : obj.block_context()) { + for (auto &[blk_uid_, ctx] : obj.block_context()) { std::vector stack_offsets_at_entry, stack_offsets_at_exit; std::vector constant_values_at_entry, constant_values_at_exit; + Uid blk_uid = {blk_uid_}; auto blk = decl.cfg[blk_uid]; auto symval_to_domains = [&](const specification::ValueMapping &symval, std::vector &stack_offsets, diff --git a/lib/Protobuf.h b/lib/Protobuf.h index ef50ceb90..90bbbc565 100644 --- a/lib/Protobuf.h +++ b/lib/Protobuf.h @@ -90,8 +90,8 @@ class ProtobufTranslator { FunctionDecl &decl) const; void AddLiveValuesToBB( - std::unordered_map> &map, - uint64_t bb_addr, + std::unordered_map> &map, + Uid bb_addr, const ::google::protobuf::RepeatedPtrField<::specification::Parameter> &values) const; diff --git a/lib/Specification.cpp b/lib/Specification.cpp index 2ecbc4836..2270210e2 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -447,7 +447,7 @@ SpecBlockContexts::SpecBlockContexts(const Specification &spec) { } std::optional> -SpecBlockContexts::GetBasicBlockContextForUid(uint64_t uid) const { +SpecBlockContexts::GetBasicBlockContextForUid(Uid uid) const { auto cont = this->contexts.find(uid); if (cont == this->contexts.end()) { return std::nullopt; diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 211564cc9..486409914 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -979,7 +979,7 @@ std::optional GetBasicBlockAddr(llvm::Function *func) { return llvm::cast(v)->getLimitedValue(); } -std::optional GetBasicBlockUid(llvm::Function *func) { +std::optional GetBasicBlockUid(llvm::Function *func) { auto meta = func->getMetadata(kBasicBlockUidMetadata); if (!meta) { return std::nullopt; @@ -987,7 +987,7 @@ std::optional GetBasicBlockUid(llvm::Function *func) { auto v = llvm::cast(meta->getOperand(0))->getValue(); - return llvm::cast(v)->getLimitedValue(); + return Uid{llvm::cast(v)->getLimitedValue()}; } llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func) { From 9a97fa91198d95f3d1508b4d58362779e2a9f936 Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Fri, 29 Sep 2023 16:23:23 -0400 Subject: [PATCH 05/10] Cleanup and support parsing Json spec --- lib/Lifters/BasicBlockLifter.cpp | 7 ++++--- lib/Lifters/FunctionLifter.cpp | 2 +- lib/Protobuf.h | 2 +- lib/Specification.cpp | 6 +++++- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 85071dcea..51adf8d07 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -638,8 +638,9 @@ void BasicBlockLifter::TerminateBasicBlockFunction( auto calling_bb = llvm::BasicBlock::Create(next_mem->getContext(), "", bbfunc.func); llvm::IRBuilder<> calling_bb_builder(calling_bb); - auto edge_bb = this->decl.cfg.at(edge_uid); - auto &child_lifter = this->flifter.GetOrCreateBasicBlockLifter(edge_bb.uid); + auto edge_bb = this->decl.cfg.find(edge_uid); + CHECK(edge_bb != this->decl.cfg.end()); + auto &child_lifter = this->flifter.GetOrCreateBasicBlockLifter(edge_bb->second.uid); auto retval = child_lifter.ControlFlowCallBasicBlockFunction( caller, calling_bb_builder, this->state_ptr, bbfunc.stack, next_mem); if (this->flifter.curr_decl->type->getReturnType()->isVoidTy()) { @@ -649,7 +650,7 @@ void BasicBlockLifter::TerminateBasicBlockFunction( } auto succ_const = llvm::ConstantInt::get( - llvm::cast(this->address_type), edge_bb.addr); + llvm::cast(this->address_type), edge_bb->second.addr); sw->addCase(succ_const, calling_bb); } diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 74b128eb2..ab3fab6b5 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -502,7 +502,7 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // TODO: This could be a thunk, that we are maybe lifting on purpose. // How should control flow redirection behave in this case? auto &cfg = this->curr_decl->cfg; - auto blk = std::find_if(std::begin(cfg), std::end(cfg), + auto blk = std::find_if(cfg.begin(), cfg.end(), [this](auto&& p) { return p.second.addr == this->func_address; }); CHECK(blk != cfg.end()); auto &entry_lifter = this->GetOrCreateBasicBlockLifter(blk->second.uid); diff --git a/lib/Protobuf.h b/lib/Protobuf.h index 90bbbc565..327152b65 100644 --- a/lib/Protobuf.h +++ b/lib/Protobuf.h @@ -91,7 +91,7 @@ class ProtobufTranslator { void AddLiveValuesToBB( std::unordered_map> &map, - Uid bb_addr, + Uid bb_uid, const ::google::protobuf::RepeatedPtrField<::specification::Parameter> &values) const; diff --git a/lib/Specification.cpp b/lib/Specification.cpp index 2270210e2..06a48f2e1 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -9,6 +9,7 @@ #include "Specification.h" #include +#include #include #include #include @@ -333,7 +334,10 @@ anvill::Result Specification::DecodeFromPB(llvm::LLVMContext &context, const std::string &pb) { ::specification::Specification spec; if (!spec.ParseFromString(pb)) { - return {"Failed to parse specification"}; + auto status = google::protobuf::util::JsonStringToMessage(pb, &spec); + if (!status.ok()) { + return {"Failed to parse specification"}; + } } auto arch{GetArch(context, spec)}; From e717d07df02dbd3fff9f73a232fa4714893be73c Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Fri, 29 Sep 2023 16:23:45 -0400 Subject: [PATCH 06/10] Revert "Test CI" This reverts commit 4cfe2d955b1b70f2e3d0fd984cfd1e43b23a19a3. --- .github/workflows/build.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 373326c98..7c02b969a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,6 +14,15 @@ on: # - cron: "0 */6 * * *" push: + branches: + - "*" + + tags: + - "*" + + pull_request: + branches: + - "*" jobs: cleanup_stale_workflows: From 20955211ca60cd45009cd74496ceb49d0f07b67e Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Mon, 2 Oct 2023 11:23:37 -0400 Subject: [PATCH 07/10] Remove basic block address metadata Look up address using UID --- include/anvill/ABI.h | 1 - include/anvill/Utils.h | 1 - lib/ABI.cpp | 1 - lib/Lifters/BasicBlockLifter.cpp | 2 -- lib/Lifters/CodeLifter.cpp | 3 +-- lib/Lifters/FunctionLifter.cpp | 4 ---- lib/Optimize.cpp | 2 +- lib/Passes/ReplaceStackReferences.cpp | 2 +- lib/Utils.cpp | 10 ---------- 9 files changed, 3 insertions(+), 23 deletions(-) diff --git a/include/anvill/ABI.h b/include/anvill/ABI.h index 6eb8748e9..ef836ff60 100644 --- a/include/anvill/ABI.h +++ b/include/anvill/ABI.h @@ -86,7 +86,6 @@ extern const std::string kAnvillStackZero; // use this to queue off of then just move it after the split extern const std::string kStackMetadata; -extern const std::string kBasicBlockAddrMetadata; extern const std::string kBasicBlockUidMetadata; diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index 85ec298b7..adba87d1e 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -134,7 +134,6 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, llvm::BasicBlock *in_block, llvm::Value *state_ptr, llvm::Value *mem_ptr); -std::optional GetBasicBlockAddr(llvm::Function *func); std::optional GetBasicBlockUid(llvm::Function *func); llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func); diff --git a/lib/ABI.cpp b/lib/ABI.cpp index 1e45c038e..1df984d2e 100644 --- a/lib/ABI.cpp +++ b/lib/ABI.cpp @@ -82,7 +82,6 @@ const std::string kAnvillDataProvenanceFunc(kAnvillNamePrefix + // `alloca`. const std::string kAnvillStackZero(kAnvillNamePrefix + "stack_zero"); -const std::string kBasicBlockAddrMetadata(kAnvillNamePrefix + "basic_block_addr_md"); const std::string kBasicBlockUidMetadata(kAnvillNamePrefix + "basic_block_uid_md"); const std::string kStackMetadata(kAnvillNamePrefix + "stack_alloc"); diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 51adf8d07..8262fa241 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -440,8 +440,6 @@ llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() { BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto func = bb_func; - func->setMetadata(anvill::kBasicBlockAddrMetadata, - GetBasicBlockAddrAnnotation(this->block_def.addr)); func->setMetadata(anvill::kBasicBlockUidMetadata, GetBasicBlockUidAnnotation(this->block_def.uid)); diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index f853c83c1..711e0da6f 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -59,8 +59,7 @@ CodeLifter::CodeLifter(const LifterOptions &options, type_specifier(type_specifier), address_type( llvm::Type::getIntNTy(llvm_context, options.arch->address_size)), - uid_type( - llvm::Type::getInt64Ty(llvm_context)), + uid_type(llvm::Type::getInt64Ty(llvm_context)), i8_type(llvm::Type::getInt8Ty(llvm_context)), i8_zero(llvm::Constant::getNullValue(i8_type)), i32_type(llvm::Type::getInt32Ty(llvm_context)), diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index ab3fab6b5..698c96ab4 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -722,13 +722,9 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, type, llvm::GlobalValue::ExternalLinkage, name, target_module); } remill::CloneFunctionInto(old_version, new_version); - new_version->setMetadata( - kBasicBlockAddrMetadata, - this->GetAddrAnnotation(block.addr, module_context)); new_version->setMetadata( kBasicBlockUidMetadata, this->GetUidAnnotation(block.uid, module_context)); - CHECK(anvill::GetBasicBlockAddr(new_version).has_value()); CHECK(anvill::GetBasicBlockUid(new_version).has_value()); } } diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index e3b1db9ca..6f704e43b 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -350,7 +350,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, // lets make sure we eliminate all the basic block functions because we dont care anymore for (auto &f : module.getFunctionList()) { - if (anvill::GetBasicBlockAddr(&f)) { + if (anvill::GetBasicBlockUid(&f)) { f.setLinkage(llvm::GlobalValue::InternalLinkage); } } diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 5ff5fbf60..55593962b 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -255,7 +255,7 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( AbstractStack::StackTypeFromSize(F.getContext(), overrunsz)); DLOG(INFO) << "Replacing stack vars in bb: " << std::hex - << *anvill::GetBasicBlockAddr(&F) << " " << std::dec + << fdecl.address << " " << std::dec << (*anvill::GetBasicBlockUid(&F)).value; DLOG(INFO) << "Stack size " << cont.GetStackSize(); DLOG(INFO) << "Max stack size " << cont.GetMaxStackSize(); diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 486409914..f90b9b4bd 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -969,16 +969,6 @@ bool CanBeAliased(llvm::Value *val) { } } -std::optional GetBasicBlockAddr(llvm::Function *func) { - auto meta = func->getMetadata(kBasicBlockAddrMetadata); - if (!meta) { - return std::nullopt; - } - - auto v = llvm::cast(meta->getOperand(0))->getValue(); - - return llvm::cast(v)->getLimitedValue(); -} std::optional GetBasicBlockUid(llvm::Function *func) { auto meta = func->getMetadata(kBasicBlockUidMetadata); if (!meta) { From caa91f5c765696c1472f671982a5abc640e2ffc4 Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Wed, 4 Oct 2023 14:29:02 -0400 Subject: [PATCH 08/10] Fix review comments and add entry_uid to function spec --- data_specifications/specification.proto | 3 ++- include/anvill/Declarations.h | 2 ++ lib/Lifters/BasicBlockLifter.cpp | 4 ---- lib/Lifters/BasicBlockLifter.h | 1 - lib/Lifters/FunctionLifter.cpp | 13 +------------ lib/Protobuf.cpp | 1 + 6 files changed, 6 insertions(+), 18 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 3b03578d2..dd95ba170 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -315,7 +315,8 @@ message TypeHint { message Function { uint64 entry_address = 1; - FunctionLinkage func_linkage = 3; + uint64 entry_uid = 12; + FunctionLinkage func_linkage = 3; Callable callable = 4; // Mapping of unique ID to codeblock map blocks = 5; diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 6aa22a40c..f2de10367 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -407,6 +407,8 @@ struct FunctionDecl : public CallableDecl { public: // Address of this function in memory. std::uint64_t address{0}; + // Entry block UID + Uid entry_uid{0}; // The maximum number of bytes of redzone afforded to this function // (if it doesn't change the stack pointer, or, for example, writes diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 8262fa241..282e1f6a2 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -401,10 +401,6 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { } } - -llvm::MDNode *BasicBlockLifter::GetBasicBlockAddrAnnotation(uint64_t addr) const { - return this->GetAddrAnnotation(addr, this->semantics_module->getContext()); -} llvm::MDNode *BasicBlockLifter::GetBasicBlockUidAnnotation(Uid uid) const { return this->GetUidAnnotation(uid, this->semantics_module->getContext()); } diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 89021c20b..4c574c798 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -107,7 +107,6 @@ class BasicBlockLifter : public CodeLifter { remill::DecodingContext context); - llvm::MDNode *GetBasicBlockAddrAnnotation(uint64_t addr) const; llvm::MDNode *GetBasicBlockUidAnnotation(Uid uid) const; public: diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 698c96ab4..0c93c3385 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -336,13 +336,6 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { return GetOrDeclareFunction(decl); } -static Uid GetRandUid() { - static std::random_device rd; - static std::mt19937_64 engine(rd()); - static std::uniform_int_distribution dist(0, UINT64_MAX); - return {dist(engine)}; -} - BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(Uid uid) { std::pair key{curr_decl->address, uid.value}; auto lifter = this->bb_lifters.find(key); @@ -501,11 +494,7 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // // TODO: This could be a thunk, that we are maybe lifting on purpose. // How should control flow redirection behave in this case? - auto &cfg = this->curr_decl->cfg; - auto blk = std::find_if(cfg.begin(), cfg.end(), - [this](auto&& p) { return p.second.addr == this->func_address; }); - CHECK(blk != cfg.end()); - auto &entry_lifter = this->GetOrCreateBasicBlockLifter(blk->second.uid); + const auto &entry_lifter = this->GetOrCreateBasicBlockLifter(this->curr_decl->entry_uid); auto call_inst = entry_lifter.CallBasicBlockFunction( ir, lifted_func_st.state_ptr, abstract_stack, this->mem_ptr_ref); diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 900cd34a7..1e64b62c0 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -516,6 +516,7 @@ Result ProtobufTranslator::DecodeFunction( const ::specification::Function &function) const { FunctionDecl decl; decl.address = function.entry_address(); + decl.entry_uid = Uid{function.entry_uid()}; if (!function.has_callable()) { return std::string("all functions should have a callable"); From cadd2d9b2844dc1f143277d4f6d3161c5da067e6 Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Tue, 10 Oct 2023 16:50:25 -0400 Subject: [PATCH 09/10] Keep track of UID to CodeBlock mapping Useful for getting CodeBlock function addresses without a tracked function --- include/anvill/Specification.h | 3 +++ lib/Specification.cpp | 21 +++++++++++++++++++++ lib/Specification.h | 3 +++ 3 files changed, 27 insertions(+) diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index 00ea1f0e5..551dcff23 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -149,6 +149,9 @@ class Specification { // Return the function beginning at `address`, or an empty `shared_ptr`. std::shared_ptr FunctionAt(std::uint64_t address) const; + // Return the basic block at `uid`, or an empty `shared_ptr`. + std::shared_ptr BlockAt(Uid uid) const; + // Return the global variable beginning at `address`, or an empty `shared_ptr`. std::shared_ptr VariableAt(std::uint64_t address) const; diff --git a/lib/Specification.cpp b/lib/Specification.cpp index 06a48f2e1..be97ebb55 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -68,6 +68,16 @@ SpecificationImpl::ParseSpecification( } auto func_ptr = new FunctionDecl(std::move(func_obj)); + + for (const auto& [uid, bb]: func_ptr->cfg) { + if (uid_to_block.count(uid)) { + std::stringstream ss; + ss << "Duplicate block Uid: " << uid.value; + return ss.str(); + } + uid_to_block[uid] = &bb; + } + functions.emplace_back(func_ptr); address_to_function.emplace(func_address, func_ptr); } @@ -409,6 +419,17 @@ Specification::FunctionAt(std::uint64_t address) const { } } +// Return the block with `uid`, or an empty `shared_ptr`. +std::shared_ptr +Specification::BlockAt(Uid uid) const { + auto it = impl->uid_to_block.find(uid); + if (it != impl->uid_to_block.end()) { + return std::shared_ptr(impl, it->second); + } else { + return {}; + } +} + // Return the global variable beginning at `address`, or an empty `shared_ptr`. std::shared_ptr Specification::VariableAt(std::uint64_t address) const { diff --git a/lib/Specification.h b/lib/Specification.h index 2be2f1ab2..07a3a2a33 100644 --- a/lib/Specification.h +++ b/lib/Specification.h @@ -65,6 +65,9 @@ class SpecificationImpl // List of functions that have been parsed from the JSON spec. std::unordered_map address_to_function; + // List of basic blocks that have been parsed from the JSON spec. + std::unordered_map uid_to_block; + // Inverted mapping of byte addresses to the variables containing those // addresses. std::unordered_map address_to_var; From 94600ac5131446f109fff857fd37f5e22e76bf60 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 12 Oct 2023 15:16:39 -0400 Subject: [PATCH 10/10] point to compatible irene --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 002fdbe8f..a1852e53c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -154,6 +154,7 @@ jobs: - name: Clone Ghidra Spec Generation uses: actions/checkout@v3 with: + ref: ekilmer/uid-codeblocks path: ${{ steps.build_paths.outputs.REL_SOURCE }}/irene3 repository: "trailofbits/irene3" fetch-depth: 0