From b447d62299f3b69d5aebee6c4918bae8658dde7c Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Thu, 14 Mar 2024 13:41:45 +0300 Subject: [PATCH] Align loops --- ir_aarch64.dasc | 5 +++++ ir_cfg.c | 16 ++++++++++++++-- ir_private.h | 2 ++ ir_x86.dasc | 5 +++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/ir_aarch64.dasc b/ir_aarch64.dasc index 08634a91..40904bd2 100644 --- a/ir_aarch64.dasc +++ b/ir_aarch64.dasc @@ -11,6 +11,8 @@ |.globals ir_lb |.section code, cold_code, rodata, jmp_table +|.define IR_LOOP_ALIGNMENT, 16 + #ifdef IR_DEBUG typedef struct _ir_mem {uint64_t v;} ir_mem; @@ -5700,6 +5702,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { continue; } + if (bb->flags & IR_BB_ALIGN_LOOP) { + | .align IR_LOOP_ALIGNMENT + } |=>b: i = bb->start; diff --git a/ir_cfg.c b/ir_cfg.c index 824cdb61..ce3dbeb2 100644 --- a/ir_cfg.c +++ b/ir_cfg.c @@ -2359,7 +2359,19 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx) #endif } - /* 5. Group chains according to the most frequent edge between them */ + /* 5. Align loop headers */ + for (b = 1; b <= ctx->cfg_blocks_count; b++) { + if (chains[b].head == b) { + bb = &ctx->cfg_blocks[b]; + if (bb->loop_depth) { + if ((bb->flags & IR_BB_LOOP_HEADER) || ir_chain_head(chains, bb->loop_header) == b) { + bb->flags |= IR_BB_ALIGN_LOOP; + } + } + } + } + + /* 6. Group chains according to the most frequent edge between them */ // TODO: Try to find a better heuristic for (e = edges, i = edges_count; i > 0; e++, i--) { #if !IR_DEBUG_BB_SCHEDULE_GRAPH @@ -2380,7 +2392,7 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx) ir_dump_chains(ctx, chains); #endif - /* 6. Form a final BB order */ + /* 7. Form a final BB order */ count = 0; for (b = 1; b <= ctx->cfg_blocks_count; b++) { if (chains[b].head == b) { diff --git a/ir_private.h b/ir_private.h index 71a6400c..414cefe0 100644 --- a/ir_private.h +++ b/ir_private.h @@ -953,6 +953,8 @@ bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref new_use); #define IR_BB_HAS_PARAM (1<<12) #define IR_BB_HAS_VAR (1<<13) +/* The following flags are set by BB scheduler */ +#define IR_BB_ALIGN_LOOP (1<<14) struct _ir_block { uint32_t flags; diff --git a/ir_x86.dasc b/ir_x86.dasc index 94c9b957..e2dbad7d 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -15,6 +15,8 @@ |.globals ir_lb |.section code, cold_code, rodata, jmp_table +|.define IR_LOOP_ALIGNMENT, 16 + #ifdef IR_DEBUG typedef struct _ir_mem {uint64_t v;} ir_mem; @@ -9742,6 +9744,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { continue; } + if (bb->flags & IR_BB_ALIGN_LOOP) { + | .align IR_LOOP_ALIGNMENT + } |=>b: i = bb->start;