From 148512596abb07d37d7fbd1aa546d031807a6091 Mon Sep 17 00:00:00 2001 From: Evan Teran Date: Sat, 23 Mar 2024 01:44:30 -0400 Subject: [PATCH] added the ability for the analysis to split basic blocks on call instructions this has the effect that any block that has a call, has exactly one call and that call is the last instruction. This may seem useless at first, but it allows for simpler implementations of other analyses. For example, to determine if a function is a no-return function, with this change we can simply look at all terminating blocks and if ALL of them end with either call to a no-return function, then it is also a no-return function. (There are of course other cases that will need to be handled, but you get the gist) --- include/BasicBlock.h | 3 ++ include/Function.h | 2 + plugins/Analyzer/Analyzer.cpp | 64 +++++++++++++++++++++++- plugins/Analyzer/Analyzer.h | 2 + plugins/FunctionFinder/DialogResults.cpp | 11 ++++ src/BasicBlock.cpp | 33 ++++++++++++ src/Function.cpp | 7 +++ 7 files changed, 120 insertions(+), 2 deletions(-) diff --git a/include/BasicBlock.h b/include/BasicBlock.h index 92117ad9d..8c9ebbd0b 100644 --- a/include/BasicBlock.h +++ b/include/BasicBlock.h @@ -81,6 +81,9 @@ class EDB_EXPORT BasicBlock { [[nodiscard]] edb::address_t firstAddress() const; [[nodiscard]] edb::address_t lastAddress() const; +public: + std::pair splitBlock(const instruction_pointer &inst); + private: std::vector instructions_; std::vector> references_; diff --git a/include/Function.h b/include/Function.h index d8e03c062..de5ba42ce 100644 --- a/include/Function.h +++ b/include/Function.h @@ -53,6 +53,8 @@ class EDB_EXPORT Function { [[nodiscard]] Type type() const; void setType(Type t); + void erase(const_iterator it); + public: [[nodiscard]] const_reference back() const; [[nodiscard]] const_reference front() const; diff --git a/plugins/Analyzer/Analyzer.cpp b/plugins/Analyzer/Analyzer.cpp index ca1c29032..2b6230a98 100644 --- a/plugins/Analyzer/Analyzer.cpp +++ b/plugins/Analyzer/Analyzer.cpp @@ -117,13 +117,21 @@ void set_function_types(IAnalyzer::FunctionMap *results) { Q_ASSERT(results); // give bonus if we have a symbol for the address - std::for_each(results->begin(), results->end(), [](Function &function) { + for (auto it = results->begin(); it != results->end(); ++it) { + + Function &function = it.value(); + + if (function.empty()) { + qDebug() << "HERE:" << it.key().toString(); + } + + Q_ASSERT(!function.empty()); if (is_thunk(function.entryAddress())) { function.setType(Function::Thunk); } else { function.setType(Function::Standard); } - }); + } } /** @@ -431,6 +439,56 @@ void Analyzer::identHeader(Analyzer::RegionData *data) { Q_UNUSED(data) } +bool split_function(Function &func) { + + for (auto bb_it = func.begin(); bb_it != func.end(); ++bb_it) { + BasicBlock &bb = bb_it->second; + + if (bb.size() <= 1) { + continue; + } + + for (auto it = bb.begin(); it != bb.end(); ++it) { + const std::shared_ptr &insn = *it; + + // if it's a call and not the last instruction of the BB + // then split! + if (is_call(*insn) && insn != bb.back()) { + + auto newBlocks = bb.splitBlock(insn); + func.erase(bb_it); + + Q_ASSERT(!newBlocks.first.empty()); + Q_ASSERT(!newBlocks.second.empty()); + + func.insert(newBlocks.first); + func.insert(newBlocks.second); + + return true; + } + } + } + + return false; +} + +void Analyzer::splitBlocks(RegionData *data) { + Q_ASSERT(data); + + for (auto it = data->functions.begin(); it != data->functions.end(); ++it) { + const edb::address_t function = it.key(); + Function &func = it.value(); + + while (split_function(func)) { + continue; + } + } +} + +void Analyzer::computeNonReturning(Analyzer::RegionData *data) { + Q_UNUSED(data); +} + /** * @brief Analyzer::collectFunctions * @param data @@ -693,6 +751,8 @@ void Analyzer::analyze(const std::shared_ptr ®ion) { {"attempting to add marked functions to the list...", [this, ®ion_data]() { bonusMarkedFunctions(®ion_data); }}, {"attempting to collect functions with fuzzy analysis...", [this, ®ion_data]() { collectFuzzyFunctions(®ion_data); }}, {"collecting basic blocks...", [this, ®ion_data]() { collectFunctions(®ion_data); }}, + {"splitting basic blocks...", [this, ®ion_data]() { splitBlocks(®ion_data); }}, + {"computing non-returning functions...", [this, ®ion_data]() { computeNonReturning(®ion_data); }}, }; const int total_steps = sizeof(analysis_steps) / sizeof(analysis_steps[0]); diff --git a/plugins/Analyzer/Analyzer.h b/plugins/Analyzer/Analyzer.h index 1fa49d0f2..02db2e4f4 100644 --- a/plugins/Analyzer/Analyzer.h +++ b/plugins/Analyzer/Analyzer.h @@ -77,6 +77,8 @@ class Analyzer final : public QObject, public IAnalyzer, public IPlugin { void bonusMarkedFunctions(RegionData *data); void bonusSymbols(RegionData *data); void collectFunctions(RegionData *data); + void computeNonReturning(RegionData *data); + void splitBlocks(RegionData *data); void collectFuzzyFunctions(RegionData *data); void doAnalysis(const std::shared_ptr ®ion); void identHeader(Analyzer::RegionData *data); diff --git a/plugins/FunctionFinder/DialogResults.cpp b/plugins/FunctionFinder/DialogResults.cpp index 75e5199a9..cbd3727fc 100644 --- a/plugins/FunctionFinder/DialogResults.cpp +++ b/plugins/FunctionFinder/DialogResults.cpp @@ -135,6 +135,17 @@ DialogResults::DialogResults(QWidget *parent, Qt::WindowFlags f) } } } else if (is_terminator(inst)) { + } else { + // if the bb's last address is another blocks first address + // connect them because they run into each other + + auto to = nodes.find(bb.lastAddress()); + if (to != nodes.end()) { + auto from = nodes.find(bb.firstAddress()); + if (to != nodes.end() && from != nodes.end()) { + new GraphEdge(from.value(), to.value(), Qt::blue); + } + } } } } diff --git a/src/BasicBlock.cpp b/src/BasicBlock.cpp index 2c2d9784b..2b10cba48 100644 --- a/src/BasicBlock.cpp +++ b/src/BasicBlock.cpp @@ -237,3 +237,36 @@ void BasicBlock::addReference(edb::address_t refsite, edb::address_t target) { std::vector> BasicBlock::references() const { return references_; } + +/** + * @brief BasicBlock::references + * @return + */ +std::pair BasicBlock::splitBlock(const instruction_pointer &inst) { + BasicBlock block1; + BasicBlock block2; + + auto it = begin(); + for (; it != end(); ++it) { + + block1.push_back(*it); + if (*it == inst) { + ++it; + break; + } + } + + for (; it != end(); ++it) { + block2.push_back(*it); + } + + for (auto it = references_.begin(); it != references_.end(); ++it) { + if (it->first >= block1.firstAddress() && it->first < block1.lastAddress()) { + block1.addReference(it->first, it->second); + } else { + block2.addReference(it->first, it->second); + } + } + + return std::make_pair(block1, block2); +} diff --git a/src/Function.cpp b/src/Function.cpp index df2cb4d65..bafa9f801 100644 --- a/src/Function.cpp +++ b/src/Function.cpp @@ -50,6 +50,7 @@ void Function::insert(BasicBlock &&bb) { * @return */ edb::address_t Function::entryAddress() const { + Q_ASSERT(!empty()); return front().firstAddress(); } @@ -58,6 +59,7 @@ edb::address_t Function::entryAddress() const { * @return */ edb::address_t Function::endAddress() const { + Q_ASSERT(!empty()); return back().lastAddress() - 1; } @@ -66,6 +68,7 @@ edb::address_t Function::endAddress() const { * @return */ edb::address_t Function::lastInstruction() const { + Q_ASSERT(!empty()); return back().back()->rva(); } @@ -214,3 +217,7 @@ Function::Type Function::type() const { void Function::setType(Type t) { type_ = t; } + +void Function::erase(const_iterator it) { + blocks_.erase(it); +}