Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UID codeblocks refactor #396

Merged
merged 15 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion data_specifications/specification.proto
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,13 @@ message BlockContext {
message CodeBlock {
uint64 address = 1;
string name = 2;
// Incoming block(s) by uid
repeated uint64 incoming_blocks = 3;
// Outgoing block(s) by uid
repeated uint64 outgoing_blocks = 4;
uint32 size = 5;
map<string, uint64> context_assignments = 6;
uint64 uid = 7;
}

message Variables {
Expand Down Expand Up @@ -314,10 +317,11 @@ message Function {
uint64 entry_address = 1;
FunctionLinkage func_linkage = 3;
Callable callable = 4;
// Mapping of unique ID to codeblock
map<uint64, CodeBlock> blocks = 5;
map<string, Variable> local_variables = 6;

// Keys are addresses of code blocks, each block
// Keys are unique IDs of code blocks, each block
// may have a corresponding context
map<uint64, BlockContext> block_context = 7;
StackEffects stack_effects = 8;
Expand Down
2 changes: 1 addition & 1 deletion include/anvill/ABI.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ extern const std::string kAnvillStackZero;
// use this to queue off of then just move it after the split
extern const std::string kStackMetadata;

extern const std::string kBasicBlockMetadata;
extern const std::string kBasicBlockUidMetadata;


/// Intrinsic that acts like a return instruction but leaves both the basic block and the parent function.
Expand Down
37 changes: 27 additions & 10 deletions include/anvill/Declarations.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include <cstddef>
#include <cstdint>
#include <functional>
#include <memory>
#include <optional>
#include <string>
Expand Down Expand Up @@ -45,15 +46,31 @@ struct Register;
} // namespace remill
namespace anvill {

struct Uid {
std::uint64_t value;
bool operator==(const Uid &) const = default;
};

}

template <>
struct std::hash<anvill::Uid> {
size_t operator()(const anvill::Uid &uid) const noexcept {
return std::hash<uint64_t>()(uid.value);
}
};

namespace anvill {

struct CodeBlock {
uint64_t addr;
uint32_t size;
std::unordered_set<uint64_t> outgoing_edges;
std::unordered_set<Uid> outgoing_edges;
// The set of context assignments that occur at the entry point to this block.
// A block may have specific decoding context properties such as "TM=1" (the thumb bit is set)
// So we declare the context assignments that occur at the entry point to a block.
std::unordered_map<std::string, std::uint64_t> context_assignments;
Uid uid;
};


Expand Down Expand Up @@ -400,24 +417,24 @@ struct FunctionDecl : public CallableDecl {
bool is_extern{false};

// These are the blocks contained within the function representing the CFG.
std::unordered_map<std::uint64_t, CodeBlock> cfg;
std::unordered_map<Uid, CodeBlock> cfg;

std::unordered_map<std::string, ParameterDecl> locals;

std::unordered_map<std::uint64_t, SpecStackOffsets> stack_offsets_at_entry;
std::unordered_map<Uid, SpecStackOffsets> stack_offsets_at_entry;

std::unordered_map<std::uint64_t, SpecStackOffsets> stack_offsets_at_exit;
std::unordered_map<Uid, SpecStackOffsets> stack_offsets_at_exit;

std::unordered_map<std::uint64_t, std::vector<ParameterDecl>>
std::unordered_map<Uid, std::vector<ParameterDecl>>
live_regs_at_entry;

std::unordered_map<std::uint64_t, std::vector<ParameterDecl>>
std::unordered_map<Uid, std::vector<ParameterDecl>>
live_regs_at_exit;

std::unordered_map<std::uint64_t, std::vector<ConstantDomain>>
std::unordered_map<Uid, std::vector<ConstantDomain>>
constant_values_at_entry;

std::unordered_map<std::uint64_t, std::vector<ConstantDomain>>
std::unordered_map<Uid, std::vector<ConstantDomain>>
constant_values_at_exit;

// sorted vector of hints
Expand Down Expand Up @@ -451,10 +468,10 @@ struct FunctionDecl : public CallableDecl {
static Result<FunctionDecl, std::string> Create(llvm::Function &func,
const remill::Arch *arch);

SpecBlockContext GetBlockContext(std::uint64_t addr) const;
SpecBlockContext GetBlockContext(Uid uid) const;

void
AddBBContexts(std::unordered_map<uint64_t, SpecBlockContext> &contexts) const;
AddBBContexts(std::unordered_map<Uid, SpecBlockContext> &contexts) const;
};

// A call site decl, as represented at a "near ABI" level. This is like a
Expand Down
8 changes: 4 additions & 4 deletions include/anvill/Passes/BasicBlockPass.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace anvill {
class BasicBlockContexts {
public:
virtual std::optional<std::reference_wrapper<const BasicBlockContext>>
GetBasicBlockContextForAddr(uint64_t addr) const = 0;
GetBasicBlockContextForUid(Uid uid) const = 0;
virtual const FunctionDecl &GetFunctionAtAddress(uint64_t addr) const = 0;
};

Expand All @@ -33,9 +33,9 @@ class BasicBlockPass : public llvm::PassInfoMixin<BasicBlockPass<T>> {
llvm::PreservedAnalyses run(llvm::Function &F,
llvm::FunctionAnalysisManager &AM) {
auto &bb_pass = *static_cast<T *>(this);
auto bbaddr = anvill::GetBasicBlockAddr(&F);
if (bbaddr.has_value()) {
auto maybe_bb_cont = contexts.GetBasicBlockContextForAddr(*bbaddr);
auto bbuid = anvill::GetBasicBlockUid(&F);
if (bbuid.has_value()) {
auto maybe_bb_cont = contexts.GetBasicBlockContextForUid(*bbuid);
if (maybe_bb_cont) {
const BasicBlockContext &bb_cont = *maybe_bb_cont;
auto &parent_func =
Expand Down
4 changes: 2 additions & 2 deletions include/anvill/Specification.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,14 @@ struct ValueDecl;

class Specification;
class SpecBlockContexts : public BasicBlockContexts {
std::unordered_map<uint64_t, SpecBlockContext> contexts;
std::unordered_map<Uid, SpecBlockContext> contexts;
std::unordered_map<uint64_t, std::shared_ptr<const FunctionDecl>> funcs;

public:
SpecBlockContexts(const Specification &spec);

virtual std::optional<std::reference_wrapper<const BasicBlockContext>>
GetBasicBlockContextForAddr(uint64_t addr) const override;
GetBasicBlockContextForUid(Uid uid) const override;

virtual const FunctionDecl &
GetFunctionAtAddress(uint64_t addr) const override;
Expand Down
2 changes: 1 addition & 1 deletion include/anvill/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl,
llvm::BasicBlock *in_block,
llvm::Value *state_ptr, llvm::Value *mem_ptr);

std::optional<uint64_t> GetBasicBlockAddr(llvm::Function *func);
std::optional<Uid> GetBasicBlockUid(llvm::Function *func);

llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func);

Expand Down
2 changes: 1 addition & 1 deletion lib/ABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ const std::string kAnvillDataProvenanceFunc(kAnvillNamePrefix +
// `alloca`.
const std::string kAnvillStackZero(kAnvillNamePrefix + "stack_zero");

const std::string kBasicBlockMetadata(kAnvillNamePrefix + "basic_block_md");
const std::string kBasicBlockUidMetadata(kAnvillNamePrefix + "basic_block_uid_md");

const std::string kStackMetadata(kAnvillNamePrefix + "stack_alloc");

Expand Down
26 changes: 13 additions & 13 deletions lib/Declarations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ VariableDecl::DeclareInModule(const std::string &name,
}

void FunctionDecl::AddBBContexts(
std::unordered_map<uint64_t, SpecBlockContext> &contexts) const {
for (const auto &[addr, _] : this->cfg) {
contexts.insert({addr, this->GetBlockContext(addr)});
std::unordered_map<Uid, SpecBlockContext> &contexts) const {
for (const auto &[uid, _] : this->cfg) {
contexts.insert({uid, this->GetBlockContext(uid)});
}
}

Expand Down Expand Up @@ -475,29 +475,29 @@ void CallableDecl::OverrideFunctionTypeWithABIReturnLayout() {

namespace {
template <class V>
V GetWithDef(uint64_t addr, const std::unordered_map<uint64_t, V> &map, V def) {
if (map.find(addr) == map.end()) {
V GetWithDef(Uid uid, const std::unordered_map<Uid, V> &map, V def) {
if (map.find(uid) == map.end()) {
return def;
}

return map.find(addr)->second;
return map.find(uid)->second;
}
} // namespace

size_t FunctionDecl::GetPointerDisplacement() const {
return this->parameter_size + this->parameter_offset;
}

SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const {
SpecBlockContext FunctionDecl::GetBlockContext(Uid uid) const {
return SpecBlockContext(
*this, GetWithDef(addr, this->stack_offsets_at_entry, SpecStackOffsets()),
GetWithDef(addr, this->stack_offsets_at_exit, SpecStackOffsets()),
GetWithDef(addr, this->constant_values_at_entry,
*this, GetWithDef(uid, this->stack_offsets_at_entry, SpecStackOffsets()),
GetWithDef(uid, this->stack_offsets_at_exit, SpecStackOffsets()),
GetWithDef(uid, this->constant_values_at_entry,
std::vector<ConstantDomain>()),
GetWithDef(addr, this->constant_values_at_exit,
GetWithDef(uid, this->constant_values_at_exit,
std::vector<ConstantDomain>()),
GetWithDef(addr, this->live_regs_at_entry, std::vector<ParameterDecl>()),
GetWithDef(addr, this->live_regs_at_exit, std::vector<ParameterDecl>()));
GetWithDef(uid, this->live_regs_at_entry, std::vector<ParameterDecl>()),
GetWithDef(uid, this->live_regs_at_exit, std::vector<ParameterDecl>()));
}

std::optional<size_t>
Expand Down
23 changes: 14 additions & 9 deletions lib/Lifters/BasicBlockLifter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,13 +402,16 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() {
}


llvm::MDNode *BasicBlockLifter::GetBasicBlockAnnotation(uint64_t addr) const {
llvm::MDNode *BasicBlockLifter::GetBasicBlockAddrAnnotation(uint64_t addr) const {
ekilmer marked this conversation as resolved.
Show resolved Hide resolved
return this->GetAddrAnnotation(addr, this->semantics_module->getContext());
}
llvm::MDNode *BasicBlockLifter::GetBasicBlockUidAnnotation(Uid uid) const {
return this->GetUidAnnotation(uid, this->semantics_module->getContext());
}

llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() {
std::string name_ = "func" + std::to_string(decl.address) + "basic_block" +
std::to_string(this->block_def.addr);
std::to_string(this->block_def.addr) + "_" + std::to_string(this->block_def.uid.value);
auto &context = this->semantics_module->getContext();
llvm::FunctionType *lifted_func_type =
llvm::dyn_cast<llvm::FunctionType>(remill::RecontextualizeType(
Expand Down Expand Up @@ -437,8 +440,8 @@ llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() {

BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() {
auto func = bb_func;
func->setMetadata(anvill::kBasicBlockMetadata,
GetBasicBlockAnnotation(this->block_def.addr));
func->setMetadata(anvill::kBasicBlockUidMetadata,
GetBasicBlockUidAnnotation(this->block_def.uid));

auto &context = this->semantics_module->getContext();
llvm::FunctionType *lifted_func_type =
Expand Down Expand Up @@ -629,21 +632,23 @@ void BasicBlockLifter::TerminateBasicBlockFunction(
auto pc = ir.CreateLoad(address_type, bbfunc.next_pc_out);
auto sw = ir.CreateSwitch(pc, this->invalid_successor_block);

for (auto e : this->block_def.outgoing_edges) {
auto succ_const = llvm::ConstantInt::get(
llvm::cast<llvm::IntegerType>(this->address_type), e);

for (auto edge_uid : this->block_def.outgoing_edges) {
auto calling_bb =
llvm::BasicBlock::Create(next_mem->getContext(), "", bbfunc.func);
llvm::IRBuilder<> calling_bb_builder(calling_bb);
auto &child_lifter = this->flifter.GetOrCreateBasicBlockLifter(e);
auto edge_bb = this->decl.cfg.find(edge_uid);
CHECK(edge_bb != this->decl.cfg.end());
auto &child_lifter = this->flifter.GetOrCreateBasicBlockLifter(edge_bb->second.uid);
auto retval = child_lifter.ControlFlowCallBasicBlockFunction(
caller, calling_bb_builder, this->state_ptr, bbfunc.stack, next_mem);
if (this->flifter.curr_decl->type->getReturnType()->isVoidTy()) {
calling_bb_builder.CreateRetVoid();
} else {
calling_bb_builder.CreateRet(retval);
}

auto succ_const = llvm::ConstantInt::get(
llvm::cast<llvm::IntegerType>(this->address_type), edge_bb->second.addr);
sw->addCase(succ_const, calling_bb);
}

Expand Down
3 changes: 2 additions & 1 deletion lib/Lifters/BasicBlockLifter.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ class BasicBlockLifter : public CodeLifter {
remill::DecodingContext context);


llvm::MDNode *GetBasicBlockAnnotation(uint64_t addr) const;
llvm::MDNode *GetBasicBlockAddrAnnotation(uint64_t addr) const;
llvm::MDNode *GetBasicBlockUidAnnotation(Uid uid) const;

public:
BasicBlockLifter(std::unique_ptr<BasicBlockContext> block_context,
Expand Down
11 changes: 11 additions & 0 deletions lib/Lifters/CodeLifter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#include <unordered_set>

#include "anvill/Declarations.h"

namespace anvill {
namespace {
// Clear out LLVM variable names. They're usually not helpful.
Expand Down Expand Up @@ -57,6 +59,7 @@ CodeLifter::CodeLifter(const LifterOptions &options,
type_specifier(type_specifier),
address_type(
llvm::Type::getIntNTy(llvm_context, options.arch->address_size)),
uid_type(llvm::Type::getInt64Ty(llvm_context)),
i8_type(llvm::Type::getInt8Ty(llvm_context)),
i8_zero(llvm::Constant::getNullValue(i8_type)),
i32_type(llvm::Type::getInt32Ty(llvm_context)),
Expand Down Expand Up @@ -191,6 +194,14 @@ llvm::MDNode *CodeLifter::GetAddrAnnotation(uint64_t addr,
return llvm::MDNode::get(context, pc_md);
}

llvm::MDNode *CodeLifter::GetUidAnnotation(Uid uid,
llvm::LLVMContext &context) const {
auto uid_val = llvm::ConstantInt::get(
remill::RecontextualizeType(uid_type, context), uid.value);
auto uid_md = llvm::ValueAsMetadata::get(uid_val);
return llvm::MDNode::get(context, uid_md);
}

// Allocate and initialize the state structure.
llvm::Value *
CodeLifter::AllocateAndInitializeStateStructure(llvm::BasicBlock *block,
Expand Down
4 changes: 4 additions & 0 deletions lib/Lifters/CodeLifter.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <remill/BC/InstructionLifter.h>
#include <remill/BC/IntrinsicTable.h>

#include "anvill/Declarations.h"
#include "anvill/Lifters.h"

namespace anvill {
Expand Down Expand Up @@ -45,6 +46,7 @@ class CodeLifter {
const TypeProvider &type_provider;
const TypeTranslator &type_specifier;
llvm::IntegerType *const address_type;
llvm::IntegerType *const uid_type;


// Convenient to keep around.
Expand Down Expand Up @@ -78,6 +80,8 @@ class CodeLifter {

llvm::MDNode *GetAddrAnnotation(uint64_t addr,
llvm::LLVMContext &context) const;
llvm::MDNode *GetUidAnnotation(Uid uid,
llvm::LLVMContext &context) const;

public:
CodeLifter(const LifterOptions &options, llvm::Module *semantics_module,
Expand Down
Loading
Loading