Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add llama perf test case #555

Merged
merged 10 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ The `tests/` contains the testcases. There are four types of testcases:
- intrinsic
- mlir
- codegen
- perf

To add new testcases for asm/intrinsic/mlir, create a new directory with `default.nix` and source files.
Refer to the existing code for more information on how to write the nix file.
Expand Down
137 changes: 111 additions & 26 deletions ipemu/csrc/elf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,125 @@

#include <fmt/core.h>
#include <linux/elf.h>
#include <endian.h>

simple_sim::load_elf_result_t simple_sim::load_elf(const std::string &fname) {
// convert little-endian integral type to host-endian
template<typename T>
T from_le(T value) {
static_assert(std::is_integral<T>::value, "T must be an integral type");

if constexpr (sizeof(T) == 1) {
return value;
} else if constexpr (sizeof(T) == 2) {
return le16toh(value);
} else if constexpr (sizeof(T) == 4) {
return le32toh(value);
} else if constexpr (sizeof(T) == 8) {
return le64toh(value);
} else {
static_assert(sizeof(T) <= 8, "Unsupported type size");
}
}

void copy_from_fs(std::ifstream &ifs, std::streamoff offset, std::streamoff size, void *dst) {
ifs.clear();
ifs.seekg(offset);
ifs.read(reinterpret_cast<char *>(dst), size);
}

template<typename T>
T read_from_fs(std::ifstream &ifs, std::streamoff offset) {
T t{};
copy_from_fs(ifs, offset, sizeof(T), &t);
return t;
}

simple_sim::load_elf_result_t simple_sim::load_elf32_little_endian(const std::string &fname) {
try {
std::ifstream fs(fname, std::ios::binary);
fs.exceptions(std::ios::failbit);

Elf32_Ehdr ehdr;
fs.read(reinterpret_cast<char *>(&ehdr), sizeof(ehdr));
CHECK(ehdr.e_machine == EM_RISCV && ehdr.e_type == ET_EXEC &&
ehdr.e_ident[EI_CLASS] == ELFCLASS32,
"ehdr check failed when loading elf");
CHECK_EQ(ehdr.e_phentsize, sizeof(elf32_phdr),
"ehdr.e_phentsize does not equal to elf32_phdr");

for (size_t i = 0; i < ehdr.e_phnum; i++) {
auto phdr_offset = ehdr.e_phoff + i * ehdr.e_phentsize;
Elf32_Phdr phdr;
fs.seekg((long)phdr_offset)
.read(reinterpret_cast<char *>(&phdr), sizeof(phdr));
if (phdr.p_type == PT_LOAD) {
CHECK(phdr.p_paddr + phdr.p_filesz < mem_size,
auto ehdr = read_from_fs<Elf32_Ehdr>(fs, 0);
CHECK(std::memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0, "elf magic not match");
CHECK(ehdr.e_machine == EM_RISCV, "elf not in RISCV");
CHECK(ehdr.e_type == ET_EXEC, "elf not executable");
CHECK(ehdr.e_ident[EI_DATA] == ELFDATA2LSB, "elf not little endian");
CHECK(ehdr.e_ident[EI_CLASS] == ELFCLASS32, "elf not in 32bit");

for (size_t i = 0; i < from_le(ehdr.e_phnum); i++) {
auto phdr_offset = from_le(ehdr.e_phoff) + i * from_le(ehdr.e_phentsize);
auto phdr = read_from_fs<Elf32_Phdr>(fs, (std::streamoff) phdr_offset);
if (from_le(phdr.p_type) == PT_LOAD) {
auto paddr = from_le(phdr.p_paddr);
auto filesz = from_le(phdr.p_filesz);

CHECK(paddr + filesz < mem_size,
"phdr p_paddr + p_filesz check failed");
fs.seekg((long)phdr.p_offset)
.read(reinterpret_cast<char *>(&mem[phdr.p_paddr]), phdr.p_filesz);
Log("LoadElfResult")
.with("segment", i)
.with("phdr_offset", fmt::format("{:08X}", phdr.p_offset))
.with("paddr_range", fmt::format("{:08X}-{:08X}", phdr.p_paddr,
phdr.p_paddr + phdr.p_memsz))
.trace();
fs.clear();
fs.seekg(from_le(phdr.p_offset))
.read(reinterpret_cast<char *>(&mem[paddr]), filesz);
Log("LoadElf")
.with("segment", i)
.with("phdr_offset", fmt::format("{:08X}", phdr.p_offset))
.with("paddr_range", fmt::format("{:08X}-{:08X}", phdr.p_paddr,
phdr.p_paddr + phdr.p_memsz))
.info();
}
}

// read section string section
auto shoff = from_le(ehdr.e_shoff);
auto shentsize = from_le(ehdr.e_shentsize);
auto shstrndx = from_le(ehdr.e_shstrndx);
auto section_string_shdr_offset = shoff + shstrndx * shentsize;
auto section_string_shdr = read_from_fs<Elf32_Shdr>(fs, section_string_shdr_offset);
std::vector<char> section_string_table(from_le(section_string_shdr.sh_size));
copy_from_fs(fs,
from_le(section_string_shdr.sh_offset),
from_le(section_string_shdr.sh_size),
section_string_table.data());

// iterate over section headers to find the symbol string table
std::vector<char> string_table;
for (int i = 0; i < from_le(ehdr.e_shnum); ++i) {
auto shdr = read_from_fs<Elf32_Shdr>(fs, shoff + i * shentsize);
if (from_le(shdr.sh_type) == SHT_STRTAB &&
std::string(&section_string_table[from_le(shdr.sh_name)]) == ".strtab") {
string_table.resize(from_le(shdr.sh_size));
copy_from_fs(fs, from_le(shdr.sh_offset), from_le(shdr.sh_size), string_table.data());
}
}

if (string_table.empty()) {
Log("LoadElf").warn("failed to find .strtab");
} else {
// iterate over section headers to find the symbol table
for (int i = 0; i < from_le(ehdr.e_shnum); ++i) {
auto shdr = read_from_fs<Elf32_Shdr>(fs, shoff + i * shentsize);
if (from_le(shdr.sh_type) == SHT_SYMTAB && std::string(&section_string_table[shdr.sh_name]) == ".symtab") {
auto entsize = from_le(shdr.sh_entsize);
unsigned int num_sym = from_le(shdr.sh_size) / entsize;
for (int j = 0; j < num_sym; ++j) {
auto offset = from_le(shdr.sh_offset) + j * entsize;
auto sym = read_from_fs<Elf32_Sym>(fs, (std::streamoff) offset);

if (ELF32_ST_TYPE(from_le(sym.st_info)) == STT_FUNC) { // Only considering function symbols
// read the name from the string table
std::string name(&string_table.at(from_le(sym.st_name)));
function_symtab[from_le(sym.st_value)] = {.name = name, .info = from_le(sym.st_info)}; // Add to map
}
}
break;
}
}
}

return {.entry_addr = ehdr.e_entry};
} catch (std::ios_base::failure &) {
throw std::system_error{errno, std::generic_category(), fname};
} catch (std::ios_base::failure &f) {
Log("LoadElf")
.with("errno", errno)
.with("fname", fname)
.with("reason", f.what())
.fatal();
}
}
18 changes: 16 additions & 2 deletions ipemu/csrc/simple_sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ class simple_sim : public simif_t {
uartlite uart;
reg_t uart_addr = 0x10000000;

struct function_sym {
std::string name;
uint8_t info;
};
std::map<uint32_t, function_sym> function_symtab;

public:
explicit simple_sim(size_t mem_size) : mem_size(mem_size) {
mem = new char[mem_size];
Expand All @@ -25,7 +31,8 @@ class simple_sim : public simif_t {
struct load_elf_result_t {
uint32_t entry_addr;
};
load_elf_result_t load_elf(const std::string &fname);

load_elf_result_t load_elf32_little_endian(const std::string &fname);

// should return NULL for MMIO addresses
char *addr_to_mem(reg_t addr) override {
Expand Down Expand Up @@ -71,5 +78,12 @@ class simple_sim : public simif_t {
// maybe nothing to do
}

const char *get_symbol(uint64_t addr) override { FATAL("Unimplemented"); }
const char *get_symbol(uint64_t addr) override {
auto find = this->function_symtab.find(addr);
if (find != this->function_symtab.end()) {
return find->second.name.c_str();
} else {
return nullptr;
}
}
};
66 changes: 41 additions & 25 deletions ipemu/csrc/spdlog_ext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,15 @@ static std::set<std::string> get_set_from_env(const char *env_name, const char d

ConsoleSink::ConsoleSink() {
whitelist = get_set_from_env("EMULATOR_LOG_WHITELIST", ',');
whitelist.insert("DPIInitCosim");
whitelist.insert("SpikeStep");
whitelist.insert("SimulationExit");
whitelist.insert("DPIPeekIssue");
whitelist.insert("DPIPokeInst");
if (whitelist.empty()) {
// default set of whitelist
whitelist.insert("DPIInitCosim");
whitelist.insert("SpikeStep");
whitelist.insert("FunctionCall");
whitelist.insert("SimulationExit");
whitelist.insert("DPIPeekIssue");
whitelist.insert("DPIPokeInst");
}

// putting it in JsonLogger::JsonLogger will not work. not knowing why
this->set_level(get_level_from_env("EMULATOR_CONSOLE_LOG_LEVEL", spdlog::level::info));
Expand All @@ -75,44 +79,56 @@ inline bool ConsoleSink::is_module_enabled(const std::string &module) {
void ConsoleSink::sink_it_(const spdlog::details::log_msg &msg) {
json payload = json::parse(msg.payload);

if (msg.level < this->level()) {
return;
}

// filter message matching the current level
if (msg.level == this->level()) {
if (!is_module_enabled(payload["_module"])) return;
}

fmt::text_style level_color;
switch (msg.level) {
case spdlog::level::debug:
case spdlog::level::trace:
level_color = fmt::fg(fmt::color::gray);
break;
case spdlog::level::info:
level_color = fmt::fg(fmt::color::white);
break;
case spdlog::level::warn:
level_color = fmt::fg(fmt::color::yellow);
break;
case spdlog::level::err:
level_color = fmt::fg(fmt::color::red);
case spdlog::level::debug:
case spdlog::level::trace:
level_color = fmt::fg(fmt::color::gray);
break;
case spdlog::level::info:
level_color = fmt::fg(fmt::color::white);
break;
case spdlog::level::warn:
level_color = fmt::fg(fmt::color::yellow);
break;
case spdlog::level::err:
level_color = fmt::fg(fmt::color::red);
break;
case spdlog::level::critical:
level_color = fmt::bg(fmt::color::red) | fmt::fg(fmt::color::white);
break;
default:
level_color = fmt::fg(fmt::color::white);
break;
case spdlog::level::critical:
level_color = fmt::bg(fmt::color::red) | fmt::fg(fmt::color::white);
break;
default:
level_color = fmt::fg(fmt::color::white);
break;
}

std::cerr << fmt::format("{} {}",
fmt::styled(payload["_cycle"].get<int64_t>(), level_color),
fmt::styled(payload["_module"].get<std::string>(), fmt::fg(fmt::color::violet))
);
);
if (payload.contains("_msg")) {
std::cerr << fmt::format(" {}", fmt::styled(payload["_msg"].get<std::string>(), fmt::fg(fmt::color::green)));
}
if (payload.contains("_with")) {
std::cerr << fmt::format(" {}", fmt::styled(payload["_with"].dump(), fmt::fg(fmt::color::gray)));
}
if (msg.level > spdlog::level::err) {
std::cerr << "\n";
const auto frames = vbridge_impl_instance.frames;
for (auto frame = frames.rbegin(); frame != frames.rend(); frame++) {
std::cerr << fmt::format(fmt::fg(fmt::color::gray), " call by {}(...) at {:08X}\n",
frame->func_name, frame->func_addr);
}
}
std::cerr << "\n";
}

Expand Down Expand Up @@ -160,7 +176,7 @@ JsonLogger::JsonLogger(bool no_logging, bool no_file_logging, bool no_console_lo
}
}

JsonLogger::JsonLogger(): do_logging(false) { }
JsonLogger::JsonLogger() : do_logging(false) {}

// We can only implement a class method with template inside the class
// declaration
Expand Down
61 changes: 58 additions & 3 deletions ipemu/csrc/vbridge_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void VBridgeImpl::dpiInitCosim() {
proc.get_state()->sstatus->write(proc.get_state()->sstatus->read() |
SSTATUS_VS | SSTATUS_FS);

auto load_result = sim.load_elf(bin);
auto load_result = sim.load_elf32_little_endian(bin);

proc.get_state()->pc = load_result.entry_addr;

Expand Down Expand Up @@ -369,6 +369,7 @@ std::optional<SpikeEvent> VBridgeImpl::spike_step() {

clear_state(proc);

reg_t old_pc = state->pc;
reg_t new_pc;
if (event) {
auto &se = event.value();
Expand All @@ -386,13 +387,67 @@ std::optional<SpikeEvent> VBridgeImpl::spike_step() {
new_pc = fetch.func(&proc, fetch.insn, state->pc);
se.log_arch_changes();
} else {
auto disasm = proc.get_disassembler()->disassemble(fetch.insn);
Log("SpikeStep")
.with("pc", fmt::format("{:08X}", state->pc))
.with("bits", fmt::format("{:08X}", fetch.insn.bits()))
.with("disasm", proc.get_disassembler()->disassemble(fetch.insn))
.with("disasm", disasm)
.with("spike_cycles", spike_cycles)
.info("spike run scalar insn");
new_pc = fetch.func(&proc, fetch.insn, state->pc);

if (disasm == "ret") {
// When a function call is at the end of some parent function, the compiler may omit the save-ra process
// In this case we need to pop more than one frames when the child function returns
// Here we traverse the frames from top to bottom, until find a frame of the corresponding return_address
int layers_to_pop = 1;
for (; layers_to_pop <= frames.size(); layers_to_pop++) {
const auto &frame = frames[frames.size() - layers_to_pop];
if (frame.return_addr == new_pc) {
Log("FunctionCall")
.with("old_pc", fmt::format("{:08X}", old_pc))
.with("new_pc", fmt::format("{:08X}", new_pc))
.with("spike_cycles", spike_cycles)
.with("depth", frames.size())
.with("depth after return", frames.size() - layers_to_pop)
.info("return");
break;
}
}

if (layers_to_pop > frames.size()) {
// sometimes `ret` is used in inner-function jumping, in this case we cannot find corresponding frame
Log("FunctionCall")
.with("old_pc", fmt::format("{:08X}", old_pc))
.with("new_pc", fmt::format("{:08X}", new_pc))
.with("spike_cycles", spike_cycles)
.with("depth", frames.size())
.warn("cannot find the frame to return");
} else for (int j = 0; j < layers_to_pop; j++) {
frames.pop_back();
}
}
}

if (new_pc - state->pc != 2 && new_pc - state->pc != 4) {
auto sym_find = sim.get_symbol(new_pc);
if (sym_find != nullptr) {
reg_t return_addr = state->XPR[1];

// handle the case with omitted save-ra, in this case return_addr is set to null since it cannot be returned to
if (return_addr - old_pc != 2 && return_addr - old_pc != 4) {
return_addr = 0;
}
Log("FunctionCall")
.with("func_name", sym_find)
.with("old_pc", fmt::format("{:08X}", old_pc))
.with("new_pc", fmt::format("{:08X}", new_pc))
.with("return_addr", fmt::format("{:08X}", return_addr))
.with("spike_cycles", spike_cycles)
.with("depth", frames.size())
.info("call");
frames.emplace_back(CallFrame{sym_find, new_pc, return_addr, spike_cycles});
}
}

// Bypass CSR insns commitlog stuff.
Expand Down Expand Up @@ -497,7 +552,7 @@ void VBridgeImpl::receive_tl_req(const VTlInterface &tl) {
Log("ReceiveTLReq")
.with("addr", fmt::format("{:08X}", addr))
.with("insn", se->jsonify_insn())
.warn("send falsy data 0xDE for accessing unexpected memory");
.info("send falsy data 0xDE for accessing unexpected memory");
actual_data[offset] = 0xDE; // falsy data
}
}
Expand Down
Loading