Skip to content

Commit

Permalink
Add fuse instruction
Browse files Browse the repository at this point in the history
To enhance execution efficiency, we employ instruction fusion by combining
sequences that adhere to specific patterns into fused instructions. Currently,
we have incorporated four fused instructions: auipc + addi, auipc + add,
multiple sw, and multiple lw.
  • Loading branch information
qwe661234 committed May 22, 2023
1 parent 1c11b39 commit 3ae3059
Show file tree
Hide file tree
Showing 2 changed files with 157 additions and 13 deletions.
15 changes: 14 additions & 1 deletion src/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,12 @@
_(cjalr, 1) \
_(cadd, 0) \
_(cswsp, 0) \
)
) \
_(fuse1, 0) \
_(fuse2, 0) \
_(fuse3, 0) \
_(fuse4, 0) \
_(empty, 0)
/* clang-format on */

/* IR list */
Expand Down Expand Up @@ -228,6 +233,11 @@ enum {
INSN_32 = 4,
};

typedef struct mem_fuse {
int32_t imm;
uint8_t rd, rs1, rs2;
} mem_fuse_t;

typedef struct rv_insn {
union {
int32_t imm;
Expand All @@ -240,6 +250,9 @@ typedef struct rv_insn {
#if RV32_HAS(EXT_C)
uint8_t shamt;
#endif
/* fuse operation */
int32_t imm2;
mem_fuse_t *mem_fuse;

/* instruction length */
uint8_t insn_len;
Expand Down
155 changes: 143 additions & 12 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,18 +287,18 @@ enum {
#define RVOP_RUN_NEXT (!ir->tailcall)
#endif

#define RVOP(inst, code) \
static bool do_##inst(riscv_t *rv UNUSED, const rv_insn_t *ir UNUSED) \
{ \
rv->X[rv_reg_zero] = 0; \
code; \
rv->csr_cycle++; \
nextop: \
rv->PC += ir->insn_len; \
if (!RVOP_RUN_NEXT) \
return true; \
const rv_insn_t *next = ir + 1; \
MUST_TAIL return next->impl(rv, next); \
#define RVOP(inst, code) \
static bool do_##inst(riscv_t *rv, const rv_insn_t *ir) \
{ \
rv->X[rv_reg_zero] = 0; \
rv->csr_cycle++; \
code; \
nextop: \
rv->PC += ir->insn_len; \
if (!RVOP_RUN_NEXT) \
return true; \
const rv_insn_t *next = ir + 1; \
MUST_TAIL return next->impl(rv, next); \
}

/* RV32I Base Instruction Set */
Expand Down Expand Up @@ -1277,6 +1277,48 @@ RVOP(cswsp, {
})
#endif

/* auipc + addi */
RVOP(fuse1, {
rv->X[ir->rd] = (int32_t) (rv->PC + ir->imm + ir->imm2);
rv->PC += ir->insn_len;
})

/* auipc + add */
RVOP(fuse2, {
rv->X[ir->rd] = (int32_t) (rv->X[ir->rs1]) + (int32_t) (rv->PC + ir->imm);
rv->PC += ir->insn_len;
})

/* multiple sw */
RVOP(fuse3, {
mem_fuse_t *mem_fuse = ir->mem_fuse;
for (int i = 0; i < ir->imm2; i++) {
const uint32_t addr = rv->X[mem_fuse[i].rs1] + mem_fuse[i].imm;
RV_EXC_MISALIGN_HANDLER(3, store, false, 1);
rv->io.mem_write_w(rv, addr, rv->X[mem_fuse[i].rs2]);
}
rv->PC += ir->insn_len * (ir->imm2 - 1);
})

/* multiple lw */
RVOP(fuse4, {
mem_fuse_t *mem_fuse = ir->mem_fuse;
for (int i = 0; i < ir->imm2; i++) {
const uint32_t addr = rv->X[mem_fuse[i].rs1] + mem_fuse[i].imm;
RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
rv->X[mem_fuse[i].rd] = rv->io.mem_read_w(rv, addr);
}
rv->PC += ir->insn_len * (ir->imm2 - 1);
})

static bool do_empty(riscv_t *rv, const rv_insn_t *ir)
{
rv->X[rv_reg_zero] = 0;
rv->csr_cycle++;
const rv_insn_t *next = ir + 1;
MUST_TAIL return next->impl(rv, next);
}

static const void *dispatch_table[] = {
#define _(inst, can_branch) [rv_insn_##inst] = do_##inst,
RISCV_INSN_LIST
Expand Down Expand Up @@ -1407,6 +1449,92 @@ static void extend_block(riscv_t *rv, block_t *block)
last_ir->branch_untaken = next->ir;
}

static void match_pattern(block_t *block)
{
for (uint32_t i = 0; i < block->n_insn - 1; i++) {
rv_insn_t *ir = block->ir + i, *next_ir = NULL;
int32_t count = 0;
switch (ir->opcode) {
case rv_insn_auipc:
next_ir = ir + 1;
if (next_ir->opcode == rv_insn_addi) {
if (ir->rd == next_ir->rs1) {
ir->opcode = rv_insn_fuse1;
ir->rd = next_ir->rd;
ir->imm2 = next_ir->imm;
ir->impl = dispatch_table[ir->opcode];
next_ir->opcode = rv_insn_empty;
next_ir->impl = dispatch_table[next_ir->opcode];
} else if (ir->rd == next_ir->rs2) {
ir->opcode = rv_insn_fuse2;
ir->rd = next_ir->rd;
ir->rs1 = next_ir->rs1;
ir->impl = dispatch_table[ir->opcode];
next_ir->opcode = rv_insn_empty;
next_ir->impl = dispatch_table[next_ir->opcode];
}
}
break;
case rv_insn_sw:
count = 1;
for (uint32_t j = 1; j < block->n_insn - 1 - i; j++) {
next_ir = ir + j;
if (next_ir->opcode != rv_insn_sw)
break;
count++;
}
if (count >= 5) {
ir->opcode = rv_insn_fuse3;
ir->mem_fuse = malloc(count * sizeof(mem_fuse_t));
ir->imm2 = count;
ir->mem_fuse[0].imm = ir->imm;
ir->mem_fuse[0].rd = ir->rd;
ir->mem_fuse[0].rs1 = ir->rs1;
ir->mem_fuse[0].rs2 = ir->rs2;
ir->impl = dispatch_table[ir->opcode];
for (int j = 1; j < count; j++) {
next_ir = ir + j;
ir->mem_fuse[j].imm = next_ir->imm;
ir->mem_fuse[j].rd = next_ir->rd;
ir->mem_fuse[j].rs1 = next_ir->rs1;
ir->mem_fuse[j].rs2 = next_ir->rs2;
next_ir->opcode = rv_insn_empty;
next_ir->impl = dispatch_table[next_ir->opcode];
}
}
break;
case rv_insn_lw:
count = 1;
for (uint32_t j = 1; j < block->n_insn - 1 - i; j++) {
next_ir = ir + j;
if (next_ir->opcode != rv_insn_lw)
break;
count++;
}
if (count >= 5) {
ir->opcode = rv_insn_fuse4;
ir->mem_fuse = malloc(count * sizeof(mem_fuse_t));
ir->imm2 = count;
ir->mem_fuse[0].imm = ir->imm;
ir->mem_fuse[0].rd = ir->rd;
ir->mem_fuse[0].rs1 = ir->rs1;
ir->mem_fuse[0].rs2 = ir->rs2;
ir->impl = dispatch_table[ir->opcode];
for (int j = 1; j < count; j++) {
next_ir = ir + j;
ir->mem_fuse[j].imm = next_ir->imm;
ir->mem_fuse[j].rd = next_ir->rd;
ir->mem_fuse[j].rs1 = next_ir->rs1;
ir->mem_fuse[j].rs2 = next_ir->rs2;
next_ir->opcode = rv_insn_empty;
next_ir->impl = dispatch_table[next_ir->opcode];
}
}
break;
}
}
}

static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
{
block_map_t *map = &rv->block_map;
Expand All @@ -1425,6 +1553,9 @@ static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
/* translate the basic block */
block_translate(rv, next);

/* fuse instruction */
match_pattern(next);

/* insert the block into block map */
block_insert(&rv->block_map, next);

Expand Down

0 comments on commit 3ae3059

Please sign in to comment.