diff --git a/src/const_opt.c b/src/const_opt.c new file mode 100644 index 000000000..4c7878ba5 --- /dev/null +++ b/src/const_opt.c @@ -0,0 +1,1006 @@ +/* RV32I Base Instruction Set */ + +/* Internal */ +CONSTOPT(nop, {}) + +/* LUI is used to build 32-bit constants and uses the U-type format. LUI + * places the U-immediate value in the top 20 bits of the destination + * register rd, filling in the lowest 12 bits with zeros. The 32-bit + * result is sign-extended to 64 bits. + */ +CONSTOPT(lui, { + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; +}) + +/* AUIPC is used to build pc-relative addresses and uses the U-type format. + * AUIPC forms a 32-bit offset from the 20-bit U-immediate, filling in the + * lowest 12 bits with zeros, adds this offset to the address of the AUIPC + * instruction, then places the result in register rd. + */ +CONSTOPT(auipc, { + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm + ir->pc; +}) + +/* JAL: Jump and Link + * store successor instruction address into rd. + * add next J imm (offset) to pc. + */ +CONSTOPT(jal, { + if (ir->rd) { + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->pc + ir->insn_len; + } +}) + +/* The indirect jump instruction JALR uses the I-type encoding. The target + * address is obtained by adding the sign-extended 12-bit I-immediate to the + * register rs1, then setting the least-significant bit of the result to zero. + * The address of the instruction following the jump (pc+4) is written to + * register rd. Register x0 can be used as the destination if the result is + * not required. + */ +CONSTOPT(jalr, { + if (ir->rd) { + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->pc + ir->insn_len; + } +}) + +/* clang-format off */ +#define OPT_BRANCH_FUNC(type, cond) \ + if (const_opt_info->is_constant[ir->rs1] && \ + const_opt_info->is_constant[ir->rs2]) { \ + if ((type) const_opt_info->const_val[ir->rs1] cond (type) \ + const_opt_info->const_val[ir->rs2]) \ + ir->imm = ir->insn_len; \ + ir->opcode = rv_insn_jal; \ + ir->impl = dispatch_table[ir->opcode]; \ + } +/* clang-format on */ + +/* BEQ: Branch if Equal */ +CONSTOPT(beq, { OPT_BRANCH_FUNC(uint32_t, !=); }) + +/* BNE: Branch if Not Equal */ +CONSTOPT(bne, { OPT_BRANCH_FUNC(uint32_t, ==); }) + +/* BLT: Branch if Less Than */ +CONSTOPT(blt, { OPT_BRANCH_FUNC(int32_t, >=); }) + +/* BGE: Branch if Greater Than */ +CONSTOPT(bge, { OPT_BRANCH_FUNC(int32_t, <); }) + +/* BLTU: Branch if Less Than Unsigned */ +CONSTOPT(bltu, { OPT_BRANCH_FUNC(uint32_t, >=); }) + +/* BGEU: Branch if Greater Than Unsigned */ +CONSTOPT(bgeu, { OPT_BRANCH_FUNC(uint32_t, <); }) + +/* LB: Load Byte */ +CONSTOPT(lb, { const_opt_info->is_constant[ir->rd] = false; }) + +/* LH: Load Halfword */ +CONSTOPT(lh, { const_opt_info->is_constant[ir->rd] = false; }) + +/* LW: Load Word */ +CONSTOPT(lw, { const_opt_info->is_constant[ir->rd] = false; }) + +/* LBU: Load Byte Unsigned */ +CONSTOPT(lbu, { const_opt_info->is_constant[ir->rd] = false; }) + +/* LHU: Load Halfword Unsigned */ +CONSTOPT(lhu, { const_opt_info->is_constant[ir->rd] = false; }) + +/* SB: Store Byte */ +CONSTOPT(sb, {}) + +/* SH: Store Halfword */ +CONSTOPT(sh, {}) + +/* SW: Store Word */ +CONSTOPT(sw, {}) + +/* ADDI adds the sign-extended 12-bit immediate to register rs1. Arithmetic + * overflow is ignored and the result is simply the low XLEN bits of the + * result. ADDI rd, rs1, 0 is used to implement the MV rd, rs1 assembler + * pseudo-instruction. + */ +CONSTOPT(addi, { + if (const_opt_info->is_constant[ir->rs1]) { + ir->imm += const_opt_info->const_val[ir->rs1]; + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SLTI place the value 1 in register rd if register rs1 is less than the + * signextended immediate when both are treated as signed numbers, else 0 is + * written to rd. + */ +CONSTOPT(slti, { + if (const_opt_info->is_constant[ir->rs1]) { + ir->imm = + (int32_t) const_opt_info->const_val[ir->rs1] < ir->imm ? 1 : 0; + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SLTIU places the value 1 in register rd if register rs1 is less than the + * immediate when both are treated as unsigned numbers, else 0 is written to rd. + */ +CONSTOPT(sltiu, { + if (const_opt_info->is_constant[ir->rs1]) { + ir->imm = + const_opt_info->const_val[ir->rs1] < (uint32_t) ir->imm ? 1 : 0; + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* XORI: Exclusive OR Immediate */ +CONSTOPT(xori, { + if (const_opt_info->is_constant[ir->rs1]) { + ir->imm ^= const_opt_info->const_val[ir->rs1]; + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* ORI: OR Immediate */ +CONSTOPT(ori, { + if (const_opt_info->is_constant[ir->rs1]) { + ir->imm |= const_opt_info->const_val[ir->rs1]; + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* ANDI performs bitwise AND on register rs1 and the sign-extended 12-bit + * immediate and place the result in rd. + */ +CONSTOPT(andi, { + if (const_opt_info->is_constant[ir->rs1]) { + ir->imm &= const_opt_info->const_val[ir->rs1]; + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SLLI performs logical left shift on the value in register rs1 by the shift + * amount held in the lower 5 bits of the immediate. + */ +CONSTOPT(slli, { + if (const_opt_info->is_constant[ir->rs1]) { + ir->imm = const_opt_info->const_val[ir->rs1] << (ir->imm & 0x1f); + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SRLI performs logical right shift on the value in register rs1 by the shift + * amount held in the lower 5 bits of the immediate. + */ +CONSTOPT(srli, { + if (const_opt_info->is_constant[ir->rs1]) { + ir->imm = const_opt_info->const_val[ir->rs1] >> (ir->imm & 0x1f); + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SRAI performs arithmetic right shift on the value in register rs1 by the + * shift amount held in the lower 5 bits of the immediate. + */ +CONSTOPT(srai, { + if (const_opt_info->is_constant[ir->rs1]) { + ir->imm = + (int32_t) const_opt_info->const_val[ir->rs1] >> (ir->imm & 0x1f); + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* ADD */ +CONSTOPT(add, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = (int32_t) const_opt_info->const_val[ir->rs1] + + (int32_t) const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SUB: Substract */ +CONSTOPT(sub, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = (int32_t) const_opt_info->const_val[ir->rs1] - + (int32_t) const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SLL: Shift Left Logical */ +CONSTOPT(sll, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rs1] + << (const_opt_info->const_val[ir->rs2] & 0x1f); + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SLT: Set on Less Than */ +CONSTOPT(slt, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = (int32_t) const_opt_info->const_val[ir->rs1] < + (int32_t) const_opt_info->const_val[ir->rs2] + ? 1 + : 0; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SLTU: Set on Less Than Unsigned */ +CONSTOPT(sltu, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rs1] < + const_opt_info->const_val[ir->rs2] + ? 1 + : 0; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* XOR: Exclusive OR */ +CONSTOPT(xor, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = (int32_t) const_opt_info->const_val[ir->rs1] ^ + (int32_t) const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SRL: Shift Right Logical */ +CONSTOPT(srl, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rs1] >> + (const_opt_info->const_val[ir->rs2] & 0x1f); + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* SRA: Shift Right Arithmetic */ +CONSTOPT(sra, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = (int32_t) const_opt_info->const_val[ir->rs1] >> + (const_opt_info->const_val[ir->rs2] & 0x1f); + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* OR */ +CONSTOPT(or, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = (int32_t) const_opt_info->const_val[ir->rs1] | + (int32_t) const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* AND */ +CONSTOPT(and, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = (int32_t) const_opt_info->const_val[ir->rs1] & + (int32_t) const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* ECALL: Environment Call */ +CONSTOPT(ecall, {}) + +/* EBREAK: Environment Break */ +CONSTOPT(ebreak, {}) + +/* WFI: Wait for Interrupt */ +CONSTOPT(wfi, {}) + +/* URET: return from traps in U-mode */ +CONSTOPT(uret, {}) + +/* SRET: return from traps in S-mode */ +CONSTOPT(sret, {}) + +/* HRET: return from traps in H-mode */ +CONSTOPT(hret, {}) + +/* MRET: return from traps in U-mode */ +CONSTOPT(mret, {}) + +#if RV32_HAS(Zifencei) /* RV32 Zifencei Standard Extension */ +CONSTOPT(fencei, {}) +#endif + +#if RV32_HAS(Zicsr) /* RV32 Zicsr Standard Extension */ +/* CSRRW: Atomic Read/Write CSR */ +CONSTOPT(csrrw, { const_opt_info->is_constant[ir->rd] = false; }) + +/* CSRRS: Atomic Read and Set Bits in CSR */ +CONSTOPT(csrrs, { const_opt_info->is_constant[ir->rd] = false; }) + +/* CSRRC: Atomic Read and Clear Bits in CSR */ +CONSTOPT(csrrc, { const_opt_info->is_constant[ir->rd] = false; }) + +/* CSRRWI */ +CONSTOPT(csrrwi, { const_opt_info->is_constant[ir->rd] = false; }) + +/* CSRRSI */ +CONSTOPT(csrrsi, { const_opt_info->is_constant[ir->rd] = false; }) + +/* CSRRCI */ +CONSTOPT(csrrci, { const_opt_info->is_constant[ir->rd] = false; }) +#endif + +/* RV32M Standard Extension */ + +#if RV32_HAS(EXT_M) +/* MUL: Multiply */ +CONSTOPT(mul, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = (int32_t) const_opt_info->const_val[ir->rs1] * + (int32_t) const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* MULH: Multiply High Signed Signed */ +CONSTOPT(mulh, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + const int64_t a = const_opt_info->const_val[ir->rs1]; + const int64_t b = const_opt_info->const_val[ir->rs2]; + ir->imm = ((uint64_t) (a * b)) >> 32; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* MULHSU: Multiply High Signed Unsigned */ +CONSTOPT(mulhsu, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + printf("OPT MUL\n"); + const_opt_info->is_constant[ir->rd] = true; + const int64_t a = (int32_t) const_opt_info->const_val[ir->rs1]; + const int64_t b = const_opt_info->const_val[ir->rs2]; + ir->imm = ((uint64_t) (a * b)) >> 32; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* MULHU: Multiply High Unsigned Unsigned */ +CONSTOPT(mulhu, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = ((int64_t) const_opt_info->const_val[ir->rs1] * + (int64_t) const_opt_info->const_val[ir->rs2]) >> + 32; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* DIV: Divide Signed */ +/* +------------------------+-----------+----------+-----------+ + * | Condition | Dividend | Divisor | DIV[W] | + * +------------------------+-----------+----------+-----------+ + * | Division by zero | x | 0 | −1 | + * | Overflow (signed only) | −2^{L−1} | −1 | −2^{L−1} | + * +------------------------+-----------+----------+-----------+ + */ +CONSTOPT(div, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + const int32_t dividend = (int32_t) const_opt_info->const_val[ir->rs1]; + const int32_t divisor = (int32_t) const_opt_info->const_val[ir->rs2]; + ir->imm = !divisor ? ~0U + : (divisor == -1 && + const_opt_info->const_val[ir->rs1] == 0x80000000U) + ? const_opt_info->const_val[ir->rs1] /* overflow */ + : (unsigned int) (dividend / divisor); + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* DIVU: Divide Unsigned */ +/* +------------------------+-----------+----------+----------+ + * | Condition | Dividend | Divisor | DIVU[W] | + * +------------------------+-----------+----------+----------+ + * | Division by zero | x | 0 | 2^L − 1 | + * +------------------------+-----------+----------+----------+ + */ +CONSTOPT(divu, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + const uint32_t dividend = const_opt_info->const_val[ir->rs1]; + const uint32_t divisor = const_opt_info->const_val[ir->rs2]; + ir->imm = !divisor ? ~0U : dividend / divisor; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* REM: Remainder Signed */ +/* +------------------------+-----------+----------+---------+ + * | Condition | Dividend | Divisor | REM[W] | + * +------------------------+-----------+----------+---------+ + * | Division by zero | x | 0 | x | + * | Overflow (signed only) | −2^{L−1} | −1 | 0 | + * +------------------------+-----------+----------+---------+ + */ +CONSTOPT(rem, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + const int32_t dividend = const_opt_info->const_val[ir->rs1]; + const int32_t divisor = const_opt_info->const_val[ir->rs2]; + ir->imm = !divisor ? dividend + : (divisor == -1 && + const_opt_info->const_val[ir->rs1] == 0x80000000U) + ? 0 /* overflow */ + : (dividend % divisor); + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* REMU: Remainder Unsigned */ +/* +------------------------+-----------+----------+----------+ + * | Condition | Dividend | Divisor | REMU[W] | + * +------------------------+-----------+----------+----------+ + * | Division by zero | x | 0 | x | + * +------------------------+-----------+----------+----------+ + */ +CONSTOPT(remu, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + const uint32_t dividend = const_opt_info->const_val[ir->rs1]; + const uint32_t divisor = const_opt_info->const_val[ir->rs2]; + ir->imm = !divisor ? dividend : dividend % divisor; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_lui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) +#endif + +/* RV32A Standard Extension */ +/* TODO: support constant optimization for A and F extension */ +#if RV32_HAS(EXT_A) + +/* LR.W: Load Reserved */ +CONSTOPT(lrw, {}) + +/* SC.W: Store Conditional */ +CONSTOPT(scw, {}) + +/* AMOSWAP.W: Atomic Swap */ +CONSTOPT(amoswapw, {}) + +/* AMOADD.W: Atomic ADD */ +CONSTOPT(amoaddw, {}) + +/* AMOXOR.W: Atomic XOR */ +CONSTOPT(amoxorw, {}) + +/* AMOAND.W: Atomic AND */ +CONSTOPT(amoandw, {}) + +/* AMOOR.W: Atomic OR */ +CONSTOPT(amoorw, {}) + +/* AMOMIN.W: Atomic MIN */ +CONSTOPT(amominw, {}) + +/* AMOMAX.W: Atomic MAX */ +CONSTOPT(amomaxw, {}) + +/* AMOMINU.W */ +CONSTOPT(amominuw, {}) + +/* AMOMAXU.W */ +CONSTOPT(amomaxuw, {}) +#endif /* RV32_HAS(EXT_A) */ + +/* RV32F Standard Extension */ + +#if RV32_HAS(EXT_F) +/* FLW */ +CONSTOPT(flw, {}) + +/* FSW */ +CONSTOPT(fsw, {}) + +/* FMADD.S */ +CONSTOPT(fmadds, {}) + +/* FMSUB.S */ +CONSTOPT(fmsubs, {}) + +/* FNMSUB.S */ +CONSTOPT(fnmsubs, {}) + +/* FNMADD.S */ +CONSTOPT(fnmadds, {}) + +/* FADD.S */ +CONSTOPT(fadds, {}) + +/* FSUB.S */ +CONSTOPT(fsubs, {}) + +/* FMUL.S */ +CONSTOPT(fmuls, {}) + +/* FDIV.S */ +CONSTOPT(fdivs, {}) + +/* FSQRT.S */ +CONSTOPT(fsqrts, {}) + +/* FSGNJ.S */ +CONSTOPT(fsgnjs, {}) + +/* FSGNJN.S */ +CONSTOPT(fsgnjns, {}) + +/* FSGNJX.S */ +CONSTOPT(fsgnjxs, {}) + +/* FMIN.S + * In IEEE754-201x, fmin(x, y) return + * - min(x,y) if both numbers are not NaN + * - if one is NaN and another is a number, return the number + * - if both are NaN, return NaN + * When input is signaling NaN, raise invalid operation + */ +CONSTOPT(fmins, {}) + +/* FMAX.S */ +CONSTOPT(fmaxs, {}) + +/* FCVT.W.S and FCVT.WU.S convert a floating point number to an integer, + * the rounding mode is specified in rm field. + */ + +/* FCVT.W.S */ +CONSTOPT(fcvtws, {}) + +/* FCVT.WU.S */ +CONSTOPT(fcvtwus, {}) + +/* FMV.X.W */ +CONSTOPT(fmvxw, {}) + +/* FEQ.S performs a quiet comparison: it only sets the invalid operation + * exception flag if either input is a signaling NaN. + */ +CONSTOPT(feqs, {}) + +/* FLT.S and FLE.S perform what the IEEE 754-2008 standard refers to as + * signaling comparisons: that is, they set the invalid operation exception + * flag if either input is NaN. + */ +CONSTOPT(flts, {}) + +CONSTOPT(fles, {}) + +/* FCLASS.S */ +CONSTOPT(fclasss, {}) + +/* FCVT.S.W */ +CONSTOPT(fcvtsw, {}) + +/* FCVT.S.WU */ +CONSTOPT(fcvtswu, {}) + +/* FMV.W.X */ +CONSTOPT(fmvwx, {}) +#endif + +/* RV32C Standard Extension */ + +#if RV32_HAS(EXT_C) +/* C.ADDI4SPN is a CIW-format instruction that adds a zero-extended non-zero + * immediate, scaledby 4, to the stack pointer, x2, and writes the result to + * rd'. + * This instruction is used to generate pointers to stack-allocated variables, + * and expands to addi rd', x2, nzuimm[9:2]. + */ +CONSTOPT(caddi4spn, { + if (const_opt_info->is_constant[rv_reg_sp]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[rv_reg_sp] + (uint16_t) ir->imm; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* C.LW loads a 32-bit value from memory into register rd'. It computes an + * effective address by adding the zero-extended offset, scaled by 4, to the + * base address in register rs1'. It expands to lw rd', offset[6:2](rs1'). + */ +CONSTOPT(clw, { const_opt_info->is_constant[ir->rd] = false; }) + +/* C.SW stores a 32-bit value in register rs2' to memory. It computes an + * effective address by adding the zero-extended offset, scaled by 4, to the + * base address in register rs1'. + * It expands to sw rs2', offset[6:2](rs1'). + */ +CONSTOPT(csw, {}) + +/* C.NOP is mapped to NOP */ + +/* C.ADDI adds the non-zero sign-extended 6-bit immediate to the value in + * register rd then writes the result to rd. C.ADDI expands into + * addi rd, rd, nzimm[5:0]. C.ADDI is only valid when rd'=x0. The code point + * with both rd=x0 and nzimm=0 encodes the C.NOP instruction; the remaining + * code points with either rd=x0 or nzimm=0 encode HINTs. + */ +CONSTOPT(caddi, { + if (const_opt_info->is_constant[ir->rd]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rd] + (uint16_t) ir->imm; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* C.JAL */ +CONSTOPT(cjal, { + const_opt_info->is_constant[rv_reg_ra] = true; + const_opt_info->const_val[ir->rd] = ir->pc + ir->insn_len; +}) + +/* C.LI loads the sign-extended 6-bit immediate, imm, into register rd. + * C.LI expands into addi rd, x0, imm[5:0]. + * C.LI is only valid when rd=x0; the code points with rd=x0 encode HINTs. + */ +CONSTOPT(cli, { + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; +}) + +/* C.ADDI16SP is used to adjust the stack pointer in procedure prologues + * and epilogues. It expands into addi x2, x2, nzimm[9:4]. + * C.ADDI16SP is only valid when nzimm'=0; the code point with nzimm=0 is + * reserved. + */ +CONSTOPT(caddi16sp, { + if (const_opt_info->is_constant[ir->rd]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rd] + ir->imm; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* C.LUI loads the non-zero 6-bit immediate field into bits 17–12 of the + * destination register, clears the bottom 12 bits, and sign-extends bit + * 17 into all higher bits of the destination. + * C.LUI expands into lui rd, nzimm[17:12]. + * C.LUI is only valid when rd'={x0, x2}, and when the immediate is not equal + * to zero. + */ +CONSTOPT(clui, { + const_opt_info->is_constant[ir->rd] = true; + const_opt_info->const_val[ir->rd] = ir->imm; +}) + +/* C.SRLI is a CB-format instruction that performs a logical right shift + * of the value in register rd' then writes the result to rd'. The shift + * amount is encoded in the shamt field. C.SRLI expands into srli rd', + * rd', shamt[5:0]. + */ +CONSTOPT(csrli, { + if (const_opt_info->is_constant[ir->rs1]) { + const_opt_info->is_constant[ir->rs1] = true; + ir->imm = const_opt_info->const_val[ir->rs1] >> ir->shamt; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rs1] = false; +}) + +/* C.SRAI is defined analogously to C.SRLI, but instead performs an + * arithmetic right shift. C.SRAI expands to srai rd', rd', shamt[5:0]. + */ +CONSTOPT(csrai, { + if (const_opt_info->is_constant[ir->rs1]) { + const uint32_t mask = 0x80000000 & const_opt_info->const_val[ir->rs1]; + const_opt_info->is_constant[ir->rs1] = true; + ir->imm = const_opt_info->const_val[ir->rs1] >> ir->shamt; + for (unsigned int i = 0; i < ir->shamt; ++i) + ir->imm |= mask >> i; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rs1] = false; +}) + +/* C.ANDI is a CB-format instruction that computes the bitwise AND of the + * value in register rd' and the sign-extended 6-bit immediate, then writes + * the result to rd'. C.ANDI expands to andi rd', rd', imm[5:0]. + */ +CONSTOPT(candi, { + if (const_opt_info->is_constant[ir->rs1]) { + const_opt_info->is_constant[ir->rs1] = true; + ir->imm = const_opt_info->const_val[ir->rs1] & ir->shamt; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rs1] = false; +}) + +/* C.SUB */ +CONSTOPT(csub, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rs1] - + const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* C.XOR */ +CONSTOPT(cxor, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rs1] ^ + const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +CONSTOPT(cor, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rs1] | + const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +CONSTOPT(cand, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rs1] & + const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* C.J performs an unconditional control transfer. The offset is sign-extended + * and added to the pc to form the jump target address. + * C.J can therefore target a ±2 KiB range. + * C.J expands to jal x0, offset[11:1]. + */ +CONSTOPT(cj, {}) + +/* C.BEQZ performs conditional control transfers. The offset is sign-extended + * and added to the pc to form the branch target address. + * It can therefore target a ±256 B range. C.BEQZ takes the branch if the + * value in register rs1' is zero. It expands to beq rs1', x0, offset[8:1]. + */ +CONSTOPT(cbeqz, { + if (const_opt_info->is_constant[ir->rs1]) { + if (const_opt_info->const_val[ir->rs1]) + ir->imm = ir->insn_len; + ir->opcode = rv_insn_cj; + ir->impl = dispatch_table[ir->opcode]; + } +}) + +/* C.BEQZ */ +CONSTOPT(cbnez, { + if (const_opt_info->is_constant[ir->rs1]) { + if (!const_opt_info->const_val[ir->rs1]) + ir->imm = ir->insn_len; + ir->opcode = rv_insn_cj; + ir->impl = dispatch_table[ir->opcode]; + } +}) + +/* C.SLLI is a CI-format instruction that performs a logical left shift of + * the value in register rd then writes the result to rd. The shift amount + * is encoded in the shamt field. C.SLLI expands into slli rd, rd, shamt[5:0]. + */ +CONSTOPT(cslli, { + if (const_opt_info->is_constant[ir->rd]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rs2] << (uint8_t) ir->imm; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* C.LWSP */ +CONSTOPT(clwsp, { const_opt_info->is_constant[ir->rd] = false; }) + +/* C.JR */ +CONSTOPT(cjr, {}) + +/* C.MV */ +CONSTOPT(cmv, { + if (const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* C.EBREAK */ +CONSTOPT(cebreak, {}) + +/* C.JALR */ +CONSTOPT(cjalr, { + const_opt_info->is_constant[rv_reg_ra] = true; + const_opt_info->const_val[ir->rd] = ir->pc + ir->insn_len; +}) + +/* C.ADD adds the values in registers rd and rs2 and writes the result to + * register rd. + * C.ADD expands into add rd, rd, rs2. + * C.ADD is only valid when rs2=x0; the code points with rs2=x0 correspond to + * the C.JALR and C.EBREAK instructions. The code points with rs2=x0 and rd=x0 + * are HINTs. + */ +CONSTOPT(cadd, { + if (const_opt_info->is_constant[ir->rs1] && + const_opt_info->is_constant[ir->rs2]) { + const_opt_info->is_constant[ir->rd] = true; + ir->imm = const_opt_info->const_val[ir->rs1] + + const_opt_info->const_val[ir->rs2]; + const_opt_info->const_val[ir->rd] = ir->imm; + ir->opcode = rv_insn_clui; + ir->impl = dispatch_table[ir->opcode]; + } else + const_opt_info->is_constant[ir->rd] = false; +}) + +/* C.SWSP */ +CONSTOPT(cswsp, {}) +#endif \ No newline at end of file diff --git a/src/decode.h b/src/decode.h index 4c7a02e87..40be29406 100644 --- a/src/decode.h +++ b/src/decode.h @@ -270,7 +270,7 @@ typedef struct rv_insn { /* fuse operation */ int32_t imm2; opcode_fuse_t *fuse; - + uint32_t pc; /* instruction length */ uint8_t insn_len; diff --git a/src/emulate.c b/src/emulate.c index f9c67904c..e192b6021 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -570,6 +570,7 @@ static void block_translate(riscv_t *rv, block_t *block) break; } ir->impl = dispatch_table[ir->opcode]; + ir->pc = block->pc_end; /* compute the end of pc */ block->pc_end += ir->insn_len; block->n_insn++; @@ -832,88 +833,98 @@ static bool detect_memcpy(riscv_t *rv, int lib) return true; } -/* Check if instructions in a block match a specific pattern. If they do, - * rewrite them as fused instructions. - * - * Strategies are being devised to increase the number of instructions that - * match the pattern, including possible instruction reordering. - */ -static void match_pattern(riscv_t *rv, block_t *block) +static bool stdlib_injection(riscv_t *rv, block_t *block) { - for (uint32_t i = 0; i < block->n_insn - 1; i++) { - rv_insn_t *ir = block->ir + i, *next_ir = NULL; - int32_t count = 0, sign = 1; - switch (ir->opcode) { - case rv_insn_addi: - /* Compare the target block with the first basic block of - * memset/memcpy, if two block is match, we would extract the - * instruction sequence starting from the pc_start of the basic - * block and then compare it with the pre-recorded memset/memcpy - * instruction sequence. - */ - if (ir->imm == 15 && ir->rd == rv_reg_t1 && - ir->rs1 == rv_reg_zero) { - next_ir = ir + 1; - if (next_ir->opcode == rv_insn_addi && - next_ir->rd == rv_reg_a4 && next_ir->rs1 == rv_reg_a0 && - next_ir->rs2 == rv_reg_zero) { - next_ir = next_ir + 1; - if (next_ir->opcode == rv_insn_bgeu && next_ir->imm == 60 && - next_ir->rs1 == rv_reg_t1 && - next_ir->rs2 == rv_reg_a2) { - if (detect_memset(rv, 1)) { - ir->opcode = rv_insn_fuse6; - ir->impl = dispatch_table[ir->opcode]; - ir->tailcall = true; - }; - } - } - } else if (ir->imm == 0 && ir->rd == rv_reg_t1 && - ir->rs1 == rv_reg_a0) { - next_ir = ir + 1; - if (next_ir->opcode == rv_insn_beq && - next_ir->rs1 == rv_reg_a2 && next_ir->rs2 == rv_reg_zero) { - if (next_ir->imm == 20 && detect_memset(rv, 2)) { + rv_insn_t *ir = block->ir, *next_ir = NULL; + switch (ir->opcode) { + case rv_insn_addi: + /* Compare the target block with the first basic block of + * memset/memcpy, if two block is match, we would extract the + * instruction sequence starting from the pc_start of the basic + * block and then compare it with the pre-recorded memset/memcpy + * instruction sequence. + */ + if (ir->imm == 15 && ir->rd == rv_reg_t1 && ir->rs1 == rv_reg_zero) { + next_ir = ir + 1; + if (next_ir->opcode == rv_insn_addi && next_ir->rd == rv_reg_a4 && + next_ir->rs1 == rv_reg_a0 && next_ir->rs2 == rv_reg_zero) { + next_ir = next_ir + 1; + if (next_ir->opcode == rv_insn_bgeu && next_ir->imm == 60 && + next_ir->rs1 == rv_reg_t1 && next_ir->rs2 == rv_reg_a2) { + if (detect_memset(rv, 1)) { ir->opcode = rv_insn_fuse6; ir->impl = dispatch_table[ir->opcode]; ir->tailcall = true; - } else if (next_ir->imm == 28 && detect_memcpy(rv, 2)) { - ir->opcode = rv_insn_fuse7; - ir->impl = dispatch_table[ir->opcode]; - ir->tailcall = true; + return true; }; } } - break; - case rv_insn_xor: - /* Compare the target block with the first basic block of memcpy, if - * two block is match, we would extract the instruction sequence - * starting from the pc_start of the basic block and then compare - * it with the pre-recorded memcpy instruction sequence. - */ - if (ir->rd == rv_reg_a5 && ir->rs1 == rv_reg_a0 && - ir->rs2 == rv_reg_a1) { - next_ir = ir + 1; - if (next_ir->opcode == rv_insn_andi && next_ir->imm == 3 && - next_ir->rd == rv_reg_a5 && next_ir->rs1 == rv_reg_a5) { + } else if (ir->imm == 0 && ir->rd == rv_reg_t1 && + ir->rs1 == rv_reg_a0) { + next_ir = ir + 1; + if (next_ir->opcode == rv_insn_beq && next_ir->rs1 == rv_reg_a2 && + next_ir->rs2 == rv_reg_zero) { + if (next_ir->imm == 20 && detect_memset(rv, 2)) { + ir->opcode = rv_insn_fuse6; + ir->impl = dispatch_table[ir->opcode]; + ir->tailcall = true; + return true; + } else if (next_ir->imm == 28 && detect_memcpy(rv, 2)) { + ir->opcode = rv_insn_fuse7; + ir->impl = dispatch_table[ir->opcode]; + ir->tailcall = true; + return true; + }; + } + } + break; + case rv_insn_xor: + /* Compare the target block with the first basic block of memcpy, if + * two block is match, we would extract the instruction sequence + * starting from the pc_start of the basic block and then compare + * it with the pre-recorded memcpy instruction sequence. + */ + if (ir->rd == rv_reg_a5 && ir->rs1 == rv_reg_a0 && + ir->rs2 == rv_reg_a1) { + next_ir = ir + 1; + if (next_ir->opcode == rv_insn_andi && next_ir->imm == 3 && + next_ir->rd == rv_reg_a5 && next_ir->rs1 == rv_reg_a5) { + next_ir = next_ir + 1; + if (next_ir->opcode == rv_insn_add && + next_ir->rd == rv_reg_a7 && next_ir->rs1 == rv_reg_a0 && + next_ir->rs2 == rv_reg_a2) { next_ir = next_ir + 1; - if (next_ir->opcode == rv_insn_add && - next_ir->rd == rv_reg_a7 && next_ir->rs1 == rv_reg_a0 && - next_ir->rs2 == rv_reg_a2) { - next_ir = next_ir + 1; - if (next_ir->opcode == rv_insn_bne && - next_ir->imm == 104 && next_ir->rs1 == rv_reg_a5 && - next_ir->rs2 == rv_reg_zero) { - if (detect_memcpy(rv, 1)) { - ir->opcode = rv_insn_fuse7; - ir->impl = dispatch_table[ir->opcode]; - ir->tailcall = true; - }; - } + if (next_ir->opcode == rv_insn_bne && next_ir->imm == 104 && + next_ir->rs1 == rv_reg_a5 && + next_ir->rs2 == rv_reg_zero) { + if (detect_memcpy(rv, 1)) { + ir->opcode = rv_insn_fuse7; + ir->impl = dispatch_table[ir->opcode]; + ir->tailcall = true; + return true; + }; } } } - break; + } + break; + /* TODO: inject other frequently-used standard library */ + } + return false; +} + +/* Check if instructions in a block match a specific pattern. If they do, + * rewrite them as fused instructions. + * + * Strategies are being devised to increase the number of instructions that + * match the pattern, including possible instruction reordering. + */ +static void match_pattern(block_t *block) +{ + for (uint32_t i = 0; i < block->n_insn - 1; i++) { + rv_insn_t *ir = block->ir + i, *next_ir = NULL; + int32_t count = 0, sign = 1; + switch (ir->opcode) { case rv_insn_auipc: next_ir = ir + 1; if (next_ir->opcode == rv_insn_addi && ir->rd == next_ir->rs1) { @@ -974,6 +985,40 @@ static void match_pattern(riscv_t *rv, block_t *block) } } + +typedef struct const_opt_info { + bool is_constant[32]; + uint32_t const_val[32]; +} const_opt_info_t; + +#define CONSTOPT(inst, code) \ + static void const_opt_##inst(UNUSED rv_insn_t *ir, \ + UNUSED const_opt_info_t *const_opt_info) \ + { \ + code; \ + } + +#include "const_opt.c" +/* clang-format off */ +static const void *const_opt_table[] = { + /* RV32 instructions */ +#define _(inst, can_branch, reg_mask) [rv_insn_##inst] = const_opt_##inst, + RV_INSN_LIST +#undef _ +}; +/* clang-format on */ +typedef void (*opt_func)(rv_insn_t *, const_opt_info_t *); +static void constant_opt(block_t *block) +{ + const_opt_info_t const_opt_info; + memset(&const_opt_info, 0, sizeof(const_opt_info)); + const_opt_info.is_constant[0] = true; + for (uint32_t i = 0; i < block->n_insn; i++) { + rv_insn_t *ir = block->ir + i; + ((opt_func) const_opt_table[ir->opcode])(ir, &const_opt_info); + } +} + static block_t *prev = NULL; static block_t *block_find_or_translate(riscv_t *rv) { @@ -992,12 +1037,15 @@ static block_t *block_find_or_translate(riscv_t *rv) /* translate the basic block */ block_translate(rv, next); + + if (!stdlib_injection(rv, next)) { + constant_opt(next); #if RV32_HAS(GDBSTUB) - if (likely(!rv->debug_mode)) + if (likely(!rv->debug_mode)) #endif - /* macro operation fusion */ - match_pattern(rv, next); - + /* macro operation fusion */ + match_pattern(next); + } /* insert the block into block map */ block_insert(&rv->block_map, next);