Skip to content

Commit

Permalink
feat: optimize decoding of compressed instructions signed imm
Browse files Browse the repository at this point in the history
  • Loading branch information
edubart committed Oct 31, 2024
1 parent 257139a commit fff9cea
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 23 deletions.
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ INTERPRET_CXXFLAGS+=-fgcse-after-reload -fpeel-loops
INTERPRET_CXXFLAGS+=-finline-limit=1024
# The interpreter hot loop is big and puts pressure on register allocation, this improves register use
INTERPRET_CXXFLAGS+=-frename-registers -fweb
# The interpreter instruction dispatch is ordered by hand, we don't want the compiler to shuffle it
# The interpreter instruction dispatch is big, we don't want the compiler duplicating its blocks.
INTERPRET_CXXFLAGS+=-freorder-blocks-algorithm=simple
# The following is known to save instructions in the hop loop
INTERPRET_CXXFLAGS+=-fgcse-sm
Expand Down
5 changes: 3 additions & 2 deletions src/i-state-access.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <type_traits>
#include <utility>

#include "compiler-defines.h"
#include "meta.h"
#include "shadow-tlb.h"

Expand Down Expand Up @@ -773,15 +774,15 @@ class i_state_access { // CRTP
}

/// \brief Invalidates all TLB entries of all types.
void flush_all_tlb() {
NO_INLINE void flush_all_tlb() {
derived().template flush_tlb_type<TLB_CODE>();
derived().template flush_tlb_type<TLB_READ>();
derived().template flush_tlb_type<TLB_WRITE>();
}

/// \brief Invalidates TLB entries for a specific virtual address.
/// \param vaddr Target virtual address.
void flush_tlb_vaddr(uint64_t vaddr) {
NO_INLINE void flush_tlb_vaddr(uint64_t vaddr) {
return derived().do_flush_tlb_vaddr(vaddr);
}

Expand Down
46 changes: 32 additions & 14 deletions src/interpret.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ static inline uint32_t csr_priv(CSR_address csr) {
/// \param new_prv New privilege level.
/// \details This function is outlined to minimize host CPU code cache pressure.
template <typename STATE_ACCESS>
static NO_INLINE void set_priv(STATE_ACCESS a, int new_prv) {
static FORCE_INLINE void set_priv(STATE_ACCESS a, int new_prv) {
INC_COUNTER(a.get_statistics(), priv_level[new_prv]);
a.write_iflags_PRV(new_prv);
// Invalidate all TLB entries
Expand Down Expand Up @@ -706,19 +706,29 @@ static inline uint32_t insn_get_CR_CSS_rs2(uint32_t insn) {
/// \param insn Instruction.
/// \details This function is forced to be inline because GCC may not always inline it.
static FORCE_INLINE int32_t insn_get_C_J_imm(uint32_t insn) {
auto imm = static_cast<int32_t>(((insn >> (12 - 11)) & 0x800) | ((insn >> (11 - 4)) & 0x10) |
((insn >> (9 - 8)) & 0x300) | ((insn << (10 - 8)) & 0x400) | ((insn >> (7 - 6)) & 0x40) |
((insn << (7 - 6)) & 0x80) | ((insn >> (3 - 1)) & 0xe) | ((insn << (5 - 2)) & 0x20));
return (imm << 20) >> 20;
return static_cast<int32_t>(
(static_cast<uint32_t>(static_cast<int32_t>(insn << 19) >> 20) & ~0b11111111111) | // imm[11]
((insn >> (11 - 4)) & 0b10000) | // imm[4]
((insn >> (9 - 8)) & 0b1100000000) | // imm[9:8]
((insn << (10 - 8)) & 0b10000000000) | // imm[10]
((insn >> (7 - 6)) & 0b1000000) | // imm[6]
((insn << (7 - 6)) & 0b10000000) | // imm[7]
((insn >> (3 - 1)) & 0b1110) | // imm[3:1]
((insn << (5 - 2)) & 0b100000) // imm[5]
);
}

/// \brief Obtains the immediate value from a C_BEQZ and C_BNEZ instruction.
/// \param insn Instruction.
/// \details This function is forced to be inline because GCC may not always inline it.
static FORCE_INLINE int32_t insn_get_C_BEQZ_BNEZ_imm(uint32_t insn) {
auto imm = static_cast<int32_t>(((insn >> (12 - 8)) & 0x100) | ((insn >> (10 - 3)) & 0x18) |
((insn << (6 - 5)) & 0xc0) | ((insn >> (3 - 1)) & 0x6) | ((insn << (5 - 2)) & 0x20));
return (imm << 23) >> 23;
return static_cast<int32_t>(
(static_cast<uint32_t>(static_cast<int32_t>(insn << 19) >> 23) & ~0b11111111) | // imm[8]
((insn >> 7) & 0b11000) | // imm[4:3]
((insn << 1) & 0b11000000) | // imm[7:6]
((insn >> 2) & 0b110) | // imm[2:1]
((insn << 3) & 0b100000) // imm[5]
);
}

/// \brief Obtains the immediate value from a CL/CS-type instruction.
Expand All @@ -739,7 +749,9 @@ static FORCE_INLINE uint32_t insn_get_CI_CB_imm(uint32_t insn) {
/// \param insn Instruction.
/// \details This function is forced to be inline because GCC may not always inline it.
static FORCE_INLINE int32_t insn_get_CI_CB_imm_se(uint32_t insn) {
return static_cast<int32_t>(insn_get_CI_CB_imm(insn) << 26) >> 26;
return static_cast<int32_t>((static_cast<uint32_t>(static_cast<int32_t>(insn << 19) >> 26) & ~0b11111) | // imm[5]
((insn >> 2) & 0b11111) // imm[4:0]
);
}

/// \brief Obtains the immediate value from a C.LW and C.SW instructions.
Expand All @@ -761,17 +773,23 @@ static FORCE_INLINE uint32_t insn_get_CIW_imm(uint32_t insn) {
/// \param insn Instruction.
/// \details This function is forced to be inline because GCC may not always inline it.
static FORCE_INLINE int32_t insn_get_C_ADDI16SP_imm(uint32_t insn) {
auto imm = static_cast<int32_t>(((insn >> (12 - 9)) & 0x200) | ((insn >> (6 - 4)) & 0x10) |
((insn << (6 - 5)) & 0x40) | ((insn << (7 - 3)) & 0x180) | ((insn << (5 - 2)) & 0x20));
return (imm << 22) >> 22;
return static_cast<int32_t>(
(static_cast<uint32_t>(static_cast<int32_t>(insn << 19) >> 22) & ~0b111111111) | // imm[9]
((insn >> 2) & 0b10000) | // imm[4]
((insn << 1) & 0b1000000) | // imm[6]
((insn << 4) & 0b110000000) | // imm[8:7]
((insn << 3) & 0b100000) // imm[5]
);
}

/// \brief Obtains the immediate value from a C.LUI instruction.
/// \param insn Instruction.
/// \details This function is forced to be inline because GCC may not always inline it.
static FORCE_INLINE int32_t insn_get_C_LUI_imm(uint32_t insn) {
auto imm = static_cast<int32_t>(((insn << (17 - 12)) & 0x20000) | ((insn << (12 - 2)) & 0x1F000));
return (imm << 14) >> 14;
return static_cast<int32_t>(
(static_cast<uint32_t>(static_cast<int32_t>(insn << 19) >> 14) & ~0b11111111111111111) | // imm[17]
((insn << 10) & 0b11111000000000000) // imm[16:12]
);
}

/// \brief Obtains the immediate value from a C.FLDSP and C.LDSP instructions.
Expand Down
12 changes: 6 additions & 6 deletions src/soft-float.h
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ struct i_sfloat {
}

/// \brief Min operation.
static F_UINT min(F_UINT a, F_UINT b, uint32_t *pfflags) {
static NO_INLINE F_UINT min(F_UINT a, F_UINT b, uint32_t *pfflags) {
if (isnan(a) || isnan(b)) {
return min_max_nan(a, b, pfflags);
}
Expand All @@ -686,7 +686,7 @@ struct i_sfloat {
}

/// \brief Max operation.
static F_UINT max(F_UINT a, F_UINT b, uint32_t *pfflags) {
static NO_INLINE F_UINT max(F_UINT a, F_UINT b, uint32_t *pfflags) {
if (isnan(a) || isnan(b)) {
return min_max_nan(a, b, pfflags);
}
Expand All @@ -699,7 +699,7 @@ struct i_sfloat {
}

/// \brief Equal operation.
static bool eq(F_UINT a, F_UINT b, uint32_t *pfflags) {
static NO_INLINE bool eq(F_UINT a, F_UINT b, uint32_t *pfflags) {
if (unlikely(isnan(a) || isnan(b))) {
if (issignan(a) || issignan(b)) {
*pfflags |= FFLAGS_NV_MASK;
Expand All @@ -713,7 +713,7 @@ struct i_sfloat {
}

/// \brief Less or equal than operation.
static bool le(F_UINT a, F_UINT b, uint32_t *pfflags) {
static NO_INLINE bool le(F_UINT a, F_UINT b, uint32_t *pfflags) {
if (unlikely(isnan(a) || isnan(b))) {
*pfflags |= FFLAGS_NV_MASK;
return false;
Expand All @@ -727,7 +727,7 @@ struct i_sfloat {
}

/// \brief Less than operation.
static bool lt(F_UINT a, F_UINT b, uint32_t *pfflags) { // NOLINT(misc-confusable-identifiers)
static NO_INLINE bool lt(F_UINT a, F_UINT b, uint32_t *pfflags) { // NOLINT(misc-confusable-identifiers)
if (unlikely(isnan(a) || isnan(b))) {
*pfflags |= FFLAGS_NV_MASK;
return false;
Expand All @@ -741,7 +741,7 @@ struct i_sfloat {
}

/// \brief Retrieves float class.
static uint32_t fclass(F_UINT a) {
static NO_INLINE uint32_t fclass(F_UINT a) {
const uint32_t a_sign = a >> (F_SIZE - 1);
const int32_t a_exp = (a >> MANT_SIZE) & EXP_MASK;
const F_UINT a_mant = a & MANT_MASK;
Expand Down

0 comments on commit fff9cea

Please sign in to comment.