Skip to content

Commit

Permalink
feat: optimize GCC interpret flags
Browse files Browse the repository at this point in the history
  • Loading branch information
edubart committed Oct 28, 2024
1 parent 04a4be3 commit 8fa2f85
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
16 changes: 10 additions & 6 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -218,17 +218,21 @@ ifneq (,$(filter gcc,$(CC)))
# saving some host instructions and improving performance.
# This flag is usually enabled by default at -O3,
# but we don't use -O3 because it enables some other flags that are not worth for the interpreter.
INTERPRET_CXXFLAGS+=-fgcse-after-reload -fpredictive-commoning -fsplit-paths -ftree-partial-pre -fpeel-loops
# GCC manual says that we should disable gcse when using computed gotos
INTERPRET_CXXFLAGS+=-fno-gcse
INTERPRET_CXXFLAGS+=-fgcse-after-reload -fpeel-loops
# The interpreter dispatch loop performs better as a big inlined function
INTERPRET_CXXFLAGS+=-finline-limit=1024
# The following optimization improves register allocation in the interpret hot loop
INTERPRET_CXXFLAGS+=-funroll-loops
INTERPRET_CXXFLAGS+=$(MYINTERPRET_CXXFLAGS)
# The interpreter hot loop is big and puts pressure on register allocation, this improves register use
INTERPRET_CXXFLAGS+=-frename-registers -fweb
# The interpreter instruction dispatch is ordered by hand, we don't want the compiler to shuffle it
INTERPRET_CXXFLAGS+=-freorder-blocks-algorithm=simple
# The following is known to save instructions in the hop loop
INTERPRET_CXXFLAGS+=-fgcse-sm
endif
endif

# Make testing new optimization options easier
INTERPRET_CXXFLAGS+=$(MYINTERPRET_CXXFLAGS)

# Link time optimizations
ifeq ($(lto),yes)
OPTFLAGS+=-flto=auto
Expand Down
2 changes: 1 addition & 1 deletion src/interpret.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2011,7 +2011,7 @@ static NO_INLINE execute_status write_csr_satp(STATE_ACCESS &a, uint64_t val) {
}

template <typename STATE_ACCESS>
static execute_status write_csr_mstatus(STATE_ACCESS &a, uint64_t val) {
static NO_INLINE execute_status write_csr_mstatus(STATE_ACCESS &a, uint64_t val) {
const uint64_t old_mstatus = a.read_mstatus() & MSTATUS_R_MASK;

// M-mode software can determine whether a privilege mode is implemented
Expand Down

0 comments on commit 8fa2f85

Please sign in to comment.