Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use PUSH/POP to save/restore persistent registers in prologue/epilogue #76

Merged
merged 3 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions ir_aarch64.dasc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
|.globals ir_lb
|.section code, cold_code, rodata, jmp_table

|.define IR_LOOP_ALIGNMENT, 8

#ifdef IR_DEBUG
typedef struct _ir_mem {uint64_t v;} ir_mem;

Expand Down Expand Up @@ -5700,6 +5702,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
continue;
}
if (bb->flags & IR_BB_ALIGN_LOOP) {
| .align IR_LOOP_ALIGNMENT
}
|=>b:

i = bb->start;
Expand Down
16 changes: 14 additions & 2 deletions ir_cfg.c
Original file line number Diff line number Diff line change
Expand Up @@ -2359,7 +2359,19 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
#endif
}

/* 5. Group chains according to the most frequent edge between them */
/* 5. Align loop headers */
for (b = 1; b <= ctx->cfg_blocks_count; b++) {
if (chains[b].head == b) {
bb = &ctx->cfg_blocks[b];
if (bb->loop_depth) {
if ((bb->flags & IR_BB_LOOP_HEADER) || ir_chain_head(chains, bb->loop_header) == b) {
bb->flags |= IR_BB_ALIGN_LOOP;
}
}
}
}

/* 6. Group chains according to the most frequent edge between them */
// TODO: Try to find a better heuristic
for (e = edges, i = edges_count; i > 0; e++, i--) {
#if !IR_DEBUG_BB_SCHEDULE_GRAPH
Expand All @@ -2380,7 +2392,7 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
ir_dump_chains(ctx, chains);
#endif

/* 6. Form a final BB order */
/* 7. Form a final BB order */
count = 0;
for (b = 1; b <= ctx->cfg_blocks_count; b++) {
if (chains[b].head == b) {
Expand Down
2 changes: 2 additions & 0 deletions ir_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -953,6 +953,8 @@ bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref new_use);
#define IR_BB_HAS_PARAM (1<<12)
#define IR_BB_HAS_VAR (1<<13)

/* The following flags are set by BB scheduler */
#define IR_BB_ALIGN_LOOP (1<<14)

struct _ir_block {
uint32_t flags;
Expand Down
93 changes: 67 additions & 26 deletions ir_x86.dasc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
|.globals ir_lb
|.section code, cold_code, rodata, jmp_table

|.define IR_LOOP_ALIGNMENT, 16

#ifdef IR_DEBUG
typedef struct _ir_mem {uint64_t v;} ir_mem;

Expand Down Expand Up @@ -3227,43 +3229,48 @@ static void ir_emit_prologue(ir_ctx *ctx)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
int offset = ctx->stack_frame_size + ctx->call_stack_size;

if (ctx->flags & IR_USE_FRAME_POINTER) {
| push Ra(IR_REG_RBP)
| mov Ra(IR_REG_RBP), Ra(IR_REG_RSP)
}
if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) {
int i;
ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP);

for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
if (IR_REGSET_IN(used_preserved_regs, i)) {
offset -= sizeof(void*);
| push Ra(i)
}
}
}
if (ctx->stack_frame_size + ctx->call_stack_size) {
if (ctx->fixed_stack_red_zone) {
IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone);
} else {
| sub Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size)
} else if (offset) {
| sub Ra(IR_REG_RSP), offset
}
}
if (ctx->used_preserved_regs) {
if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) {
ir_reg fp;
int offset;
uint32_t i;
ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
int i;
ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP);

if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
offset = 0;
offset -= ctx->stack_frame_size + ctx->call_stack_size;
} else {
fp = IR_REG_STACK_POINTER;
offset = ctx->stack_frame_size + ctx->call_stack_size;
}
for (i = 0; i < IR_REG_NUM; i++) {
for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
if (IR_REGSET_IN(used_preserved_regs, i)) {
if (i < IR_REG_FP_FIRST) {
offset -= sizeof(void*);
| mov aword [Ra(fp)+offset], Ra(i)
offset -= sizeof(void*);
if (ctx->mflags & IR_X86_AVX) {
| vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
} else {
offset -= sizeof(void*);
if (ctx->mflags & IR_X86_AVX) {
| vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
} else {
| movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
}
| movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
}
}
}
Expand Down Expand Up @@ -3330,26 +3337,24 @@ static void ir_emit_epilogue(ir_ctx *ctx)
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;

if (ctx->used_preserved_regs) {
if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) {
int i;
int offset;
uint32_t i;
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;

if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
offset = 0;
} else {
fp = IR_REG_STACK_POINTER;
offset = ctx->stack_frame_size + ctx->call_stack_size;
}
for (i = 0; i < IR_REG_NUM; i++) {
if (IR_REGSET_IN(used_preserved_regs, i)) {
if (i < IR_REG_FP_FIRST) {
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;

offset -= sizeof(void*);
| mov Ra(i), aword [Ra(fp)+offset]
} else {
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;

offset -= sizeof(void*);
if (ctx->mflags & IR_X86_AVX) {
| vmovsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset]
Expand All @@ -3361,7 +3366,40 @@ static void ir_emit_epilogue(ir_ctx *ctx)
}
}

if (ctx->flags & IR_USE_FRAME_POINTER) {
if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) {
int i;
ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP);
int offset;

if (ctx->flags & IR_USE_FRAME_POINTER) {
offset = 0;
} else {
offset = ctx->stack_frame_size + ctx->call_stack_size;
}
if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) {
int i;
ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP);

for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) {
if (IR_REGSET_IN(used_preserved_regs, i)) {
offset -= sizeof(void*);
}
}
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
| lea Ra(IR_REG_RSP), [Ra(IR_REG_RBP)+offset]
} else if (offset) {
| add Ra(IR_REG_RSP), offset
}
for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) {
if (IR_REGSET_IN(used_preserved_regs, i)) {
| pop Ra(i)
}
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
| pop Ra(IR_REG_RBP)
}
} else if (ctx->flags & IR_USE_FRAME_POINTER) {
| mov Ra(IR_REG_RSP), Ra(IR_REG_RBP)
| pop Ra(IR_REG_RBP)
} else if (ctx->stack_frame_size + ctx->call_stack_size) {
Expand Down Expand Up @@ -9742,6 +9780,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
continue;
}
if (bb->flags & IR_BB_ALIGN_LOOP) {
| .align IR_LOOP_ALIGNMENT
}
|=>b:

i = bb->start;
Expand Down
10 changes: 4 additions & 6 deletions tests/debug.Windows-x86_64/call2.irt
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ Windows-x86_64
}
--EXPECT--
test:
subq $0x28, %rsp
movq %rbx, 0x20(%rsp)
pushq %rbx
subq $0x20, %rsp
movl %ecx, %ebx
movl %edx, %eax
movl %r8d, %edx
Expand All @@ -33,12 +33,10 @@ test:
leaq .L1(%rip), %rcx
callq printf
addl %ebx, %eax
movq 0x20(%rsp), %rbx
addq $0x28, %rsp
addq $0x20, %rsp
popq %rbx
retq
.rodata
.db 0x90, 0x90
.L1:
.db 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x25, 0x64, 0x21, 0x0a, 0x00


8 changes: 4 additions & 4 deletions tests/debug.Windows-x86_64/call3.irt
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ Windows-x86_64
}
--EXPECT--
test:
subq $0x28, %rsp
movq %rbx, 0x20(%rsp)
pushq %rbx
subq $0x20, %rsp
movl %ecx, %ebx
movq 0x30(%rsp), %rax
movl %edx, %r10d
Expand All @@ -31,8 +31,8 @@ test:
leaq .L1(%rip), %rcx
callq *(%rax)
addl %ebx, %eax
movq 0x20(%rsp), %rbx
addq $0x28, %rsp
addq $0x20, %rsp
popq %rbx
retq
.rodata
.db 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90
Expand Down
14 changes: 14 additions & 0 deletions tests/debug.Windows-x86_64/dessa_002.irt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@ Windows-x86_64
--EXPECT--
test:
jmp .L2
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
.L1:
movl %eax, %ecx
.L2:
Expand Down
1 change: 1 addition & 0 deletions tests/debug.Windows-x86_64/dessa_003.irt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ test:
cltd
idivl %ecx
jmp .L2
nop
.L1:
xchgl %eax, %r9d
.L2:
Expand Down
29 changes: 19 additions & 10 deletions tests/debug.Windows-x86_64/fig.irt
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,10 @@ Windows-x86_64
}
--EXPECT--
test:
subq $0x20, %rsp
movq %rbx, 0x18(%rsp)
movq %rbp, 0x10(%rsp)
movq %rsi, 8(%rsp)
movq %rdi, (%rsp)
pushq %rbx
pushq %rbp
pushq %rsi
pushq %rdi
movl 0x30(%rsp), %eax
movl 0x38(%rsp), %r10d
movl 0x40(%rsp), %r11d
Expand All @@ -80,6 +79,17 @@ test:
imull %edx, %esi
leal 4(%rsi), %edi
jmp .L3
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
.L1:
movl %ecx, %r10d
imull %edx, %r10d
Expand All @@ -105,9 +115,8 @@ test:
addl %r10d, %eax
addl %r11d, %eax
leal 1(%rax, %rbx), %eax
movq 0x18(%rsp), %rbx
movq 0x10(%rsp), %rbp
movq 8(%rsp), %rsi
movq (%rsp), %rdi
addq $0x20, %rsp
popq %rdi
popq %rsi
popq %rbp
popq %rbx
retq
10 changes: 10 additions & 0 deletions tests/debug.Windows-x86_64/loop_002.irt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ test:
xorl %ecx, %ecx
xorl %eax, %eax
jmp .L4
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
.L1:
cmpl %ecx, %edx
je .L2
Expand Down
8 changes: 8 additions & 0 deletions tests/debug.Windows-x86_64/regset-fib.irt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ main:
movsd %xmm0, 0x20(%rsp)
xorpd %xmm0, %xmm0
jmp .L2
nop
nop
nop
nop
nop
nop
nop
nop
.L1:
addsd %xmm0, %xmm1
movsd %xmm1, 0x20(%rsp)
Expand Down
8 changes: 8 additions & 0 deletions tests/debug.Windows-x86_64/regset-fib2.irt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ main:
movsd %xmm0, 0x20(%rsp)
xorpd %xmm0, %xmm0
jmp .L2
nop
nop
nop
nop
nop
nop
nop
nop
.L1:
addsd %xmm0, %xmm1
movsd %xmm1, 0x20(%rsp)
Expand Down
Loading
Loading