From 1743ea52225a578bdc1369882b2740eb2ee026c2 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape <22630228+VedantParanjape@users.noreply.github.com> Date: Tue, 30 Jan 2024 14:54:14 +0530 Subject: [PATCH 1/2] [blocks] Add basic blocks for CFG representation of the AST This patch adds a basic_block class and a function to translate the AST to a CFG representation. It also dumps the basic block to std::cerr in builder_context.cpp for debugging The algorithm to convert the AST to CFG uses a worklist to do so, it first creates basic blocks for all the top level AST elements, and then using a worklist iteratively expands these top level AST elements, adding more basic blocks between them. bb1 ---> bb2 ==> bb1 ---> (bb-a1...bb-an) ---> bb2 It also pads if statement blocks with an exit blocks. This makes it easier to handle loops, as we now have a single entry/exit into the if block. |--------| ----| |---- |--------| * buildit input source code dyn_var a = 0; for (dyn_var c = 0; c < 100; c = c + 3) { for (dyn_var b = 0; b < 10; b = b + 1) { a = a + b; } } * output of std::cerr, dump of the generated basic blocks ++++++ basic blocks ++++++ 0:decl0: ; 0 br decl1, 1:decl1: ; decl0, 0 br label2, 2:label2: ; decl1, goto13, 0 br if3, 3:if3: ; label2, 0 LT_EXPR VAR_EXPR VAR (var1) INT_CONST (100) br stmt8, stmtexit7, 4:stmt8: ; if3, 0 br decl9, 5:decl9: ; stmt8, 1 br label10, 6:label10: ; decl9, goto21, 1 br if11, 7:if11: ; label10, 1 LT_EXPR VAR_EXPR VAR (var2) INT_CONST (10) br stmt18, stmtexit17, 8:stmt18: ; if11, 1 br expr19, 9:expr19: ; stmt18, 2 br expr20, 10:expr20: ; expr19, 2 br goto21, 11:goto21: ; expr20, 2 br label10, 12:stmtexit17: ; if11, 1 br expr12, 13:expr12: ; stmtexit17, 1 br goto13, 14:goto13: ; expr12, 1 br label2, 15:stmtexit7: ; if3, 0 br ++++++ basic blocks ++++++ --- include/blocks/basic_blocks.h | 31 +++++ include/builder/builder_context.h | 1 + src/blocks/basic_blocks.cpp | 210 ++++++++++++++++++++++++++++++ src/builder/builder_context.cpp | 33 ++++- 4 files changed, 273 insertions(+), 2 deletions(-) create mode 100644 include/blocks/basic_blocks.h create mode 100644 src/blocks/basic_blocks.cpp diff --git a/include/blocks/basic_blocks.h b/include/blocks/basic_blocks.h new file mode 100644 index 0000000..ee4f6a9 --- /dev/null +++ b/include/blocks/basic_blocks.h @@ -0,0 +1,31 @@ +#ifndef BASIC_BLOCKS_H +#define BASIC_BLOCKS_H +#include "blocks/stmt.h" +#include +#include +#include +#include + +class basic_block { + public: + typedef std::vector> cfg_block; + basic_block(std::string label): name(label) {}; + + cfg_block predecessor; + cfg_block successor; + block::expr::Ptr branch_expr; + std::shared_ptr then_branch; + std::shared_ptr else_branch; + std::shared_ptr exit_block; + bool is_exit_block = false; + block::stmt::Ptr parent; + unsigned int ast_index; + unsigned int ast_depth; + unsigned int id; + std::string name; + static std::map> ast_to_basic_block_map; +}; + +basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast); + +#endif \ No newline at end of file diff --git a/include/builder/builder_context.h b/include/builder/builder_context.h index 840fbca..92470d0 100644 --- a/include/builder/builder_context.h +++ b/include/builder/builder_context.h @@ -1,5 +1,6 @@ #ifndef BUILDER_CONTEXT #define BUILDER_CONTEXT +#include "blocks/basic_blocks.h" #include "blocks/expr.h" #include "blocks/stmt.h" #include "builder/forward_declarations.h" diff --git a/src/blocks/basic_blocks.cpp b/src/blocks/basic_blocks.cpp new file mode 100644 index 0000000..90b41b8 --- /dev/null +++ b/src/blocks/basic_blocks.cpp @@ -0,0 +1,210 @@ +#include "blocks/basic_blocks.h" +#include + +using namespace block; +std::map> basic_block::ast_to_basic_block_map = {}; + +basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast) { + std::deque> work_list; + basic_block::cfg_block return_list; + int basic_block_count = 0; + + // step 1: fill the work_list + unsigned int ast_index_counter = 0; + for (auto st: ast->stmts) { + auto bb = std::make_shared(std::to_string(basic_block_count)); + bb->parent = st; + bb->ast_index = ast_index_counter++; + bb->ast_depth = 0; + work_list.push_back(bb); + basic_block_count++; + } + + // step 2: add successors + for (unsigned i = 0; work_list.size() != 0 && i < work_list.size() - 1; i++) { + work_list[i]->successor.push_back(work_list[i+1]); + } + + // step 3: process blocks: every xx_stmt type statement is made out into a basic block + while (work_list.size()) { + auto bb = work_list.front(); + + if (isa(bb->parent)) { + ast_index_counter = 0; + stmt_block::Ptr stmt_block_ = to(bb->parent); + bb->name = "stmt" + bb->name; + + if (stmt_block_->stmts.size() > 0) { + basic_block::cfg_block stmt_block_list; + + // convert all statements of this stmt_block into a basic block + for (auto st: stmt_block_->stmts) { + stmt_block_list.push_back(std::make_shared(std::to_string(basic_block_count++))); + stmt_block_list.back()->parent = st; + stmt_block_list.back()->ast_index = ast_index_counter++; + stmt_block_list.back()->ast_depth = bb->ast_depth + 1; + } + + // set the basic block successors + for (unsigned i = 0; stmt_block_list.size() != 0 && i < stmt_block_list.size() - 1; i++) { + stmt_block_list[i]->successor.push_back(stmt_block_list[i+1]); + } + + // since we insert these stmts between bb1 ---> bb2 ==> bb1 ---> (bb-a1...bb-an) ---> bb2 + // point the successor of the stmt_block_list to the basic block that bb1's successor + // pointed to. After this, clear the bb1's successor and push the front of stmt_block_list + // to bb1's successor list. + stmt_block_list.back()->successor.push_back(bb->successor.front()); + bb->successor.clear(); + bb->successor.push_back(stmt_block_list.front()); + + // push a rather empty-ish basic block, which will branch to the next basic block, or the next statement. + return_list.push_back(bb); + work_list.pop_front(); + // now insert the pending blocks to be processed at the front of the work_list + work_list.insert(work_list.begin(), stmt_block_list.begin(), stmt_block_list.end()); + } + else { + return_list.push_back(bb); + work_list.pop_front(); + } + } + else if (isa(bb->parent)) { + bb->name = "if" + bb->name; + + if_stmt::Ptr if_stmt_ = to(bb->parent); + // assign the if condition to the basic block + bb->branch_expr = if_stmt_->cond; + + // create a exit block + auto exit_bb = std::make_shared("exit" + std::to_string(basic_block_count)); + // assign it a empty stmt_block as parent + exit_bb->parent = std::make_shared(); + // mark the basic block as exit block + exit_bb->is_exit_block = true; + // set the ast depth of the basic block + exit_bb->ast_depth = bb->ast_depth; + // check if this is the last block, if yes the successor will be empty + if (bb->successor.size()) { + // set the successor to the block that if_stmt successor pointer to earlier + exit_bb->successor.push_back(bb->successor.front()); + // clear the successor block from the if_stmt + bb->successor.clear(); + } + // remove the if from the work_list + work_list.pop_front(); + // push the exit block to the work_list + work_list.push_front(exit_bb); + std::cerr << "inside if handler: " << bb->name << "\n"; + // if there is a then_stmt, create a basic block for it + if (to(if_stmt_->then_stmt)->stmts.size() != 0) { + auto then_bb = std::make_shared(std::to_string(++basic_block_count)); + // set the parent of this block as the then stmts + then_bb->parent = if_stmt_->then_stmt; + // set the ast depth of the basic block + then_bb->ast_depth = bb->ast_depth; + // set the successor of this block to be the exit block + then_bb->successor.push_back(exit_bb); + // set the successor of the original if_stmt block to be this then block + bb->successor.push_back(then_bb); + // set the then branch ptr + bb->then_branch = then_bb; + // push the block to the work_list, to expand it further + work_list.push_front(then_bb); + std::cerr << "inside then" << "\n"; + } + // if there is a else_stmt, create a basic block for it + if (to(if_stmt_->else_stmt)->stmts.size() != 0) { + auto else_bb = std::make_shared(std::to_string(++basic_block_count)); + // set the parent of this block as the else stmts + else_bb->parent = if_stmt_->else_stmt; + // set the ast depth of the basic block + else_bb->ast_depth = bb->ast_depth; + // set the successor of this block to be the exit block + else_bb->successor.push_back(exit_bb); + // set the successor of the orignal if_stmt block to be this else block + bb->successor.push_back(else_bb); + // set the else branch ptr + bb->else_branch = else_bb; + // push the block to the work_list, to expand it further + work_list.insert(work_list.begin() + 1, else_bb); + std::cerr << "inside else" << "\n"; + } + + // if there is no then/else block, then have the exit block as successor as well. + if (bb->successor.size() <= 1) bb->successor.push_back(exit_bb); + + // set the missing block as the exit block + if (!bb->then_branch) bb->then_branch = exit_bb; + else if (!bb->else_branch) bb->else_branch = exit_bb; + + // set the exit block of this if stmt + bb->exit_block = exit_bb; + + return_list.push_back(bb); + } + else if (isa(bb->parent)) { + bb->name = "expr" + bb->name; + return_list.push_back(bb); + work_list.pop_front(); + } + else if (isa(bb->parent)) { + bb->name = "decl" + bb->name; + return_list.push_back(bb); + work_list.pop_front(); + } + else if (isa(bb->parent)) { + bb->name = "label" + bb->name; + return_list.push_back(bb); + work_list.pop_front(); + } + else if (isa(bb->parent)) { + bb->name = "goto" + bb->name; + return_list.push_back(bb); + work_list.pop_front(); + } + else if (isa(bb->parent)) { + bb->name = "return" + bb->name; + return_list.push_back(bb); + work_list.pop_front(); + } + + basic_block_count++; + } + + // step 4: resolve goto calls to successors of labels + for (auto bb: return_list) { + if (isa(bb->parent)) { + auto goto_source = std::find_if(return_list.begin(), return_list.end(), + [bb](std::shared_ptr bb_l) { + if (isa(bb_l->parent)) { + return to(bb_l->parent)->label1 == to(bb->parent)->label1; + } + return false; + }); + if (goto_source != return_list.end()) { + bb->successor.clear(); + bb->successor.push_back(*goto_source); + } + } + } + + // step 5: populate the predecessors + for (auto bb: return_list) { + for (auto succ: bb->successor) { + succ->predecessor.push_back(bb); + } + } + + // step 6: assign each basic_block an id + for (unsigned int i = 0; i < return_list.size(); i++) { + return_list[i]->id = i; + } + + // step 7: populate the ast -> bb map + for (auto bb: return_list) { + bb->ast_to_basic_block_map[bb->parent] = bb; + } + + return return_list; +} \ No newline at end of file diff --git a/src/builder/builder_context.cpp b/src/builder/builder_context.cpp index 0d47559..81211f3 100644 --- a/src/builder/builder_context.cpp +++ b/src/builder/builder_context.cpp @@ -306,6 +306,28 @@ block::stmt::Ptr builder_context::extract_ast_from_function_impl(void) { if (feature_unstructured) return ast; + basic_block::cfg_block BBs = generate_basic_blocks(block::to(ast)); + std::cerr << "++++++ basic blocks ++++++ \n"; + for (auto bb: BBs) { + std::cerr << bb->id << ":" << bb->name << ":" << " ; "; + for (auto pred: bb->predecessor) { + std::cerr << pred->name << ", "; + } + std::cerr << bb->ast_depth; + std::cerr << "\n"; + if (bb->branch_expr) { + std::cerr << " "; + bb->branch_expr->dump(std::cerr, 0); + } + std::cerr << " "; + std::cerr << "br "; + for (auto branches: bb->successor) { + std::cerr << branches->name << ", "; + } + std::cerr << "\n"; + } + std::cerr << "++++++ basic blocks ++++++ \n"; + block::loop_finder finder; finder.ast = ast; ast->accept(&finder); @@ -405,7 +427,6 @@ block::stmt::Ptr builder_context::extract_ast_from_function_internal(std::vector ret_ast = ast; } catch (LoopBackException &e) { current_builder_context = nullptr; - block::goto_stmt::Ptr goto_stmt = std::make_shared(); goto_stmt->static_offset.clear(); goto_stmt->temporary_label_number = e.static_offset; @@ -421,7 +442,15 @@ block::stmt::Ptr builder_context::extract_ast_from_function_internal(std::vector add_stmt_to_current_block(goto_stmt, false); } else { for (unsigned int i = e.child_id; i < e.parent->stmts.size(); i++) { - add_stmt_to_current_block(e.parent->stmts[i], false); + if (isa(e.parent->stmts[i])) { + block::goto_stmt::Ptr goto_stmt = std::make_shared(); + goto_stmt->static_offset.clear(); + goto_stmt->temporary_label_number = to(e.parent->stmts[i])->temporary_label_number; + add_stmt_to_current_block(goto_stmt, false); + } + else { + add_stmt_to_current_block(e.parent->stmts[i], false); + } } } ret_ast = ast; From cce357e59803ebfa0233c84bad0da7b7ee518cc8 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape <22630228+VedantParanjape@users.noreply.github.com> Date: Sat, 10 Feb 2024 19:16:56 +0530 Subject: [PATCH 2/2] [blocks] Move basic_blocks to block namespace and few other changes This patch also moves the debug statements to a separate function and calls them outside of builder_context.cpp. It also cleans up stray debug statements. --- include/blocks/basic_blocks.h | 11 ++++--- src/blocks/basic_blocks.cpp | 53 ++++++++++++++++++++++++--------- src/builder/builder_context.cpp | 26 ++-------------- 3 files changed, 49 insertions(+), 41 deletions(-) diff --git a/include/blocks/basic_blocks.h b/include/blocks/basic_blocks.h index ee4f6a9..f227a87 100644 --- a/include/blocks/basic_blocks.h +++ b/include/blocks/basic_blocks.h @@ -6,6 +6,7 @@ #include #include +namespace block { class basic_block { public: typedef std::vector> cfg_block; @@ -13,19 +14,21 @@ class basic_block { cfg_block predecessor; cfg_block successor; - block::expr::Ptr branch_expr; + expr::Ptr branch_expr; std::shared_ptr then_branch; std::shared_ptr else_branch; std::shared_ptr exit_block; bool is_exit_block = false; - block::stmt::Ptr parent; + stmt::Ptr parent; unsigned int ast_index; unsigned int ast_depth; unsigned int id; std::string name; - static std::map> ast_to_basic_block_map; + static std::map> ast_to_basic_block_map; }; -basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast); +basic_block::cfg_block generate_basic_blocks(stmt_block::Ptr ast); +void dump(basic_block::cfg_block basic_block_list); +} // namespace block #endif \ No newline at end of file diff --git a/src/blocks/basic_blocks.cpp b/src/blocks/basic_blocks.cpp index 90b41b8..f722b45 100644 --- a/src/blocks/basic_blocks.cpp +++ b/src/blocks/basic_blocks.cpp @@ -1,10 +1,10 @@ #include "blocks/basic_blocks.h" #include -using namespace block; -std::map> basic_block::ast_to_basic_block_map = {}; +namespace block { +std::map> basic_block::ast_to_basic_block_map = {}; -basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast) { +basic_block::cfg_block generate_basic_blocks(stmt_block::Ptr ast) { std::deque> work_list; basic_block::cfg_block return_list; int basic_block_count = 0; @@ -29,7 +29,7 @@ basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast) { while (work_list.size()) { auto bb = work_list.front(); - if (isa(bb->parent)) { + if (isa(bb->parent)) { ast_index_counter = 0; stmt_block::Ptr stmt_block_ = to(bb->parent); bb->name = "stmt" + bb->name; @@ -95,7 +95,6 @@ basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast) { work_list.pop_front(); // push the exit block to the work_list work_list.push_front(exit_bb); - std::cerr << "inside if handler: " << bb->name << "\n"; // if there is a then_stmt, create a basic block for it if (to(if_stmt_->then_stmt)->stmts.size() != 0) { auto then_bb = std::make_shared(std::to_string(++basic_block_count)); @@ -111,7 +110,6 @@ basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast) { bb->then_branch = then_bb; // push the block to the work_list, to expand it further work_list.push_front(then_bb); - std::cerr << "inside then" << "\n"; } // if there is a else_stmt, create a basic block for it if (to(if_stmt_->else_stmt)->stmts.size() != 0) { @@ -128,7 +126,6 @@ basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast) { bb->else_branch = else_bb; // push the block to the work_list, to expand it further work_list.insert(work_list.begin() + 1, else_bb); - std::cerr << "inside else" << "\n"; } // if there is no then/else block, then have the exit block as successor as well. @@ -143,27 +140,27 @@ basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast) { return_list.push_back(bb); } - else if (isa(bb->parent)) { + else if (isa(bb->parent)) { bb->name = "expr" + bb->name; return_list.push_back(bb); work_list.pop_front(); } - else if (isa(bb->parent)) { + else if (isa(bb->parent)) { bb->name = "decl" + bb->name; return_list.push_back(bb); work_list.pop_front(); } - else if (isa(bb->parent)) { + else if (isa(bb->parent)) { bb->name = "label" + bb->name; return_list.push_back(bb); work_list.pop_front(); } - else if (isa(bb->parent)) { + else if (isa(bb->parent)) { bb->name = "goto" + bb->name; return_list.push_back(bb); work_list.pop_front(); } - else if (isa(bb->parent)) { + else if (isa(bb->parent)) { bb->name = "return" + bb->name; return_list.push_back(bb); work_list.pop_front(); @@ -174,7 +171,7 @@ basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast) { // step 4: resolve goto calls to successors of labels for (auto bb: return_list) { - if (isa(bb->parent)) { + if (isa(bb->parent)) { auto goto_source = std::find_if(return_list.begin(), return_list.end(), [bb](std::shared_ptr bb_l) { if (isa(bb_l->parent)) { @@ -206,5 +203,33 @@ basic_block::cfg_block generate_basic_blocks(block::stmt_block::Ptr ast) { bb->ast_to_basic_block_map[bb->parent] = bb; } + // print debug logs +#ifdef BASIC_BLOCK_DEBUG + dump(return_list); +#endif return return_list; -} \ No newline at end of file +} + +void dump(basic_block::cfg_block basic_block_list) { + std::cerr << "++++++ basic blocks ++++++ \n"; + for (auto bb: basic_block_list) { + std::cerr << bb->id << ":" << bb->name << ":" << " ; "; + for (auto pred: bb->predecessor) { + std::cerr << pred->name << ", "; + } + std::cerr << bb->ast_depth; + std::cerr << "\n"; + if (bb->branch_expr) { + std::cerr << " "; + bb->branch_expr->dump(std::cerr, 0); + } + std::cerr << " "; + std::cerr << "br "; + for (auto branches: bb->successor) { + std::cerr << branches->name << ", "; + } + std::cerr << "\n"; + } + std::cerr << "++++++ basic blocks ++++++ \n"; +} +} // namespace block diff --git a/src/builder/builder_context.cpp b/src/builder/builder_context.cpp index 81211f3..94cc2b9 100644 --- a/src/builder/builder_context.cpp +++ b/src/builder/builder_context.cpp @@ -306,27 +306,7 @@ block::stmt::Ptr builder_context::extract_ast_from_function_impl(void) { if (feature_unstructured) return ast; - basic_block::cfg_block BBs = generate_basic_blocks(block::to(ast)); - std::cerr << "++++++ basic blocks ++++++ \n"; - for (auto bb: BBs) { - std::cerr << bb->id << ":" << bb->name << ":" << " ; "; - for (auto pred: bb->predecessor) { - std::cerr << pred->name << ", "; - } - std::cerr << bb->ast_depth; - std::cerr << "\n"; - if (bb->branch_expr) { - std::cerr << " "; - bb->branch_expr->dump(std::cerr, 0); - } - std::cerr << " "; - std::cerr << "br "; - for (auto branches: bb->successor) { - std::cerr << branches->name << ", "; - } - std::cerr << "\n"; - } - std::cerr << "++++++ basic blocks ++++++ \n"; + block::basic_block::cfg_block BBs = generate_basic_blocks(block::to(ast)); block::loop_finder finder; finder.ast = ast; @@ -442,10 +422,10 @@ block::stmt::Ptr builder_context::extract_ast_from_function_internal(std::vector add_stmt_to_current_block(goto_stmt, false); } else { for (unsigned int i = e.child_id; i < e.parent->stmts.size(); i++) { - if (isa(e.parent->stmts[i])) { + if (block::isa(e.parent->stmts[i])) { block::goto_stmt::Ptr goto_stmt = std::make_shared(); goto_stmt->static_offset.clear(); - goto_stmt->temporary_label_number = to(e.parent->stmts[i])->temporary_label_number; + goto_stmt->temporary_label_number = block::to(e.parent->stmts[i])->temporary_label_number; add_stmt_to_current_block(goto_stmt, false); } else {