Skip to content

Commit

Permalink
Merge pull request #132 from qwe661234/Add_fuse_operation
Browse files Browse the repository at this point in the history
Introduce preliminary macro operation fusion
  • Loading branch information
jserv authored May 29, 2023
2 parents 7bde175 + 18213bc commit 5fb9d8b
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 3 deletions.
17 changes: 16 additions & 1 deletion src/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,14 @@
_(cjalr, 1) \
_(cadd, 0) \
_(cswsp, 0) \
)
) \
/* macro operation fusion: convert specific RISC-V instruction patterns
* into faster and equivalent code
*/ \
_(fuse1, 0) \
_(fuse2, 0) \
_(fuse3, 0) \
_(fuse4, 0)
/* clang-format on */

/* IR list */
Expand Down Expand Up @@ -228,6 +235,11 @@ enum {
INSN_32 = 4,
};

typedef struct {
int32_t imm;
uint8_t rd, rs1, rs2;
} opcode_fuse_t;

typedef struct rv_insn {
union {
int32_t imm;
Expand All @@ -240,6 +252,9 @@ typedef struct rv_insn {
#if RV32_HAS(EXT_C)
uint8_t shamt;
#endif
/* fuse operation */
int16_t imm2;
opcode_fuse_t *fuse;

/* instruction length */
uint8_t insn_len;
Expand Down
137 changes: 135 additions & 2 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ extern struct target_ops gdbstub_ops;
#include "decode.h"
#include "riscv.h"
#include "riscv_private.h"
#include "state.h"
#include "utils.h"

/* RISC-V exception code list */
Expand Down Expand Up @@ -310,7 +311,15 @@ static uint32_t last_pc = 0;
/* RV32I Base Instruction Set */

/* Internal */
RVOP(nop, {/* no operation */});
static bool do_nop(riscv_t *rv, const rv_insn_t *ir)
{
rv->X[rv_reg_zero] = 0;
rv->csr_cycle++;
rv->PC += ir->insn_len;
const rv_insn_t *next = ir + 1;
MUST_TAIL return next->impl(rv, next);
}


/* LUI is used to build 32-bit constants and uses the U-type format. LUI
* places the U-immediate value in the top 20 bits of the destination
Expand Down Expand Up @@ -1219,6 +1228,46 @@ RVOP(cswsp, {
})
#endif

/* auipc + addi */
RVOP(fuse1, { rv->X[ir->rd] = (int32_t) (rv->PC + ir->imm + ir->imm2); })

/* auipc + add */
RVOP(fuse2, {
rv->X[ir->rd] = (int32_t) (rv->X[ir->rs1]) + (int32_t) (rv->PC + ir->imm);
})

/* multiple sw */
RVOP(fuse3, {
opcode_fuse_t *fuse = ir->fuse;
uint32_t addr = rv->X[fuse[0].rs1] + fuse[0].imm;
/* the memory addresses of the sw instructions are contiguous, so we only
* need to check the first sw instruction to determine if its memory address
* is misaligned or if the memory chunk does not exist.
*/
RV_EXC_MISALIGN_HANDLER(3, store, false, 1);
rv->io.mem_write_w(rv, addr, rv->X[fuse[0].rs2]);
for (int i = 1; i < ir->imm2; i++) {
addr = rv->X[fuse[i].rs1] + fuse[i].imm;
rv->io.mem_write_w(rv, addr, rv->X[fuse[i].rs2]);
}
})

/* multiple lw */
RVOP(fuse4, {
opcode_fuse_t *fuse = ir->fuse;
uint32_t addr = rv->X[fuse[0].rs1] + fuse[0].imm;
/* the memory addresses of the lw instructions are contiguous, so we only
* need to check the first lw instruction to determine if its memory address
* is misaligned or if the memory chunk does not exist.
*/
RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
rv->X[fuse[0].rd] = rv->io.mem_read_w(rv, addr);
for (int i = 1; i < ir->imm2; i++) {
addr = rv->X[fuse[i].rs1] + fuse[i].imm;
rv->X[fuse[i].rd] = rv->io.mem_read_w(rv, addr);
}
})

static const void *dispatch_table[] = {
#define _(inst, can_branch) [rv_insn_##inst] = do_##inst,
RISCV_INSN_LIST
Expand Down Expand Up @@ -1337,7 +1386,6 @@ static void block_translate(riscv_t *rv, block_t *block)
/* compute the end of pc */
block->pc_end += ir->insn_len;
block->n_insn++;

/* stop on branch */
if (insn_is_branch(ir->opcode)) {
/* recursive jump translation */
Expand All @@ -1356,6 +1404,85 @@ static void block_translate(riscv_t *rv, block_t *block)
block->ir[block->n_insn - 1].tailcall = true;
}

#define COMBINE_MEM_OPS(RW) \
count = 1; \
next_ir = ir + 1; \
if (next_ir->opcode != IIF(RW)(rv_insn_lw, rv_insn_sw)) \
break; \
sign = (ir->imm - next_ir->imm) >> 31 ? -1 : 1; \
for (uint32_t j = 1; j < block->n_insn - 1 - i; j++) { \
next_ir = ir + j; \
if (next_ir->opcode != IIF(RW)(rv_insn_lw, rv_insn_sw) || \
ir->rs1 != next_ir->rs1 || ir->imm - next_ir->imm != 4 * sign) \
break; \
count++; \
} \
if (count > 1) { \
ir->opcode = IIF(RW)(rv_insn_fuse4, rv_insn_fuse3); \
ir->fuse = malloc(count * sizeof(opcode_fuse_t)); \
ir->imm2 = count; \
memcpy(ir->fuse, ir, sizeof(opcode_fuse_t)); \
ir->impl = dispatch_table[ir->opcode]; \
for (int j = 1; j < count; j++) { \
next_ir = ir + j; \
memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t)); \
next_ir->opcode = rv_insn_nop; \
next_ir->impl = dispatch_table[next_ir->opcode]; \
} \
}


/* examine whether instructions in a block match a specific pattern. If so,
* rewrite them into fused instructions.
*
* We plan to devise strategies to increase the number of instructions that
* match the pattern, such as reordering the instructions.
*/
static void match_pattern(block_t *block)
{
for (uint32_t i = 0; i < block->n_insn - 1; i++) {
rv_insn_t *ir = block->ir + i, *next_ir = NULL;
int32_t count = 0, sign = 1;
switch (ir->opcode) {
case rv_insn_auipc:
next_ir = ir + 1;
if (next_ir->opcode == rv_insn_addi && ir->rd == next_ir->rs1) {
/* the destination register of instruction auipc is equal to the
* source register 1 of next instruction addi */
ir->opcode = rv_insn_fuse1;
ir->rd = next_ir->rd;
ir->imm2 = next_ir->imm;
ir->impl = dispatch_table[ir->opcode];
next_ir->opcode = rv_insn_nop;
next_ir->impl = dispatch_table[next_ir->opcode];
} else if (next_ir->opcode == rv_insn_add &&
ir->rd == next_ir->rs2) {
/* the destination register of instruction auipc is equal to the
* source register 2 of next instruction add */
ir->opcode = rv_insn_fuse2;
ir->rd = next_ir->rd;
ir->rs1 = next_ir->rs1;
ir->impl = dispatch_table[ir->opcode];
next_ir->opcode = rv_insn_nop;
next_ir->impl = dispatch_table[next_ir->opcode];
}
break;
/* If the memory addresses of a sequence of store or load instructions
* are contiguous, combine these instructions.
*/
case rv_insn_sw:
COMBINE_MEM_OPS(0);
break;
case rv_insn_lw:
COMBINE_MEM_OPS(1);
break;
/* FIXME: lui + addi */
/* TODO: mixture of sw and lw */
/* TODO: reorder insturction to match pattern */
}
}
}

static block_t *prev = NULL;
static block_t *block_find_or_translate(riscv_t *rv)
{
Expand All @@ -1374,6 +1501,12 @@ static block_t *block_find_or_translate(riscv_t *rv)

/* translate the basic block */
block_translate(rv, next);
#if RV32_HAS(GDBSTUB)
if (!rv->debug_mode)
#endif
/* macro operation fusion */
match_pattern(next);


/* insert the block into block map */
block_insert(&rv->block_map, next);
Expand Down
2 changes: 2 additions & 0 deletions src/riscv.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ void block_map_clear(block_map_t *map)
block_t *block = map->map[i];
if (!block)
continue;
for (uint32_t i = 0; i < block->n_insn; i++)
free(block->ir[i].fuse);
free(block->ir);
free(block);
map->map[i] = NULL;
Expand Down

0 comments on commit 5fb9d8b

Please sign in to comment.