Skip to content

Commit

Permalink
Speed-up absl::Symbolize by ~6x via faster file reads.
Browse files Browse the repository at this point in the history
absl::Symbolize does tons of tiny reads. Speed this up by switching
from lseek+read to a pread, and by reading more data than requested
into a buffer.

A faster absl::Symbolize will be helpful in tests and when printing
stack traces on /threadz etc.

Results for absl::Symbolize benchmark that exercises uncached behavior
of absl::Symbolize:

```
name          old time/op  new time/op  delta
BM_Symbolize  16.4ms ±12%   2.6ms ± 0%  -84.06%  (p=0.001 n=5+9)
```

PiperOrigin-RevId: 582687566
Change-Id: I44caf189d81867f3fd8c050a3100a4b9a8e744d7
  • Loading branch information
Abseil Team authored and copybara-github committed Nov 15, 2023
1 parent 61ea5d2 commit aa14601
Showing 1 changed file with 108 additions and 44 deletions.
152 changes: 108 additions & 44 deletions absl/debugging/symbolize_elf.inc
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,30 @@ ObjFile *AddrMap::Add() {
return new (&obj_[size_++]) ObjFile;
}

class CachingFile {
public:
// Setup reader for fd that uses buf[0, buf_size-1] as a cache.
CachingFile(int fd, char *buf, size_t buf_size)
: fd_(fd),
cache_(buf),
cache_size_(buf_size),
cache_start_(0),
cache_limit_(0) {}

int fd() const { return fd_; }
ssize_t ReadFromOffset(void *buf, size_t count, off_t offset);
bool ReadFromOffsetExact(void *buf, size_t count, off_t offset);

private:
// Bytes [cache_start_, cache_limit_-1] from fd_ are stored in
// a prefix of cache_[0, cache_size_-1].
int fd_;
char *cache_;
size_t cache_size_;
off_t cache_start_;
off_t cache_limit_;
};

// ---------------------------------------------------------------

enum FindSymbolResult { SYMBOL_NOT_FOUND = 1, SYMBOL_TRUNCATED, SYMBOL_FOUND };
Expand Down Expand Up @@ -330,6 +354,7 @@ class Symbolizer {
SYMBOL_BUF_SIZE = 3072,
TMP_BUF_SIZE = 1024,
SYMBOL_CACHE_LINES = 128,
FILE_CACHE_SIZE = 8192,
};

AddrMap addr_map_;
Expand All @@ -338,6 +363,7 @@ class Symbolizer {
bool addr_map_read_;

char symbol_buf_[SYMBOL_BUF_SIZE];
char file_cache_[FILE_CACHE_SIZE];

// tmp_buf_ will be used to store arrays of ElfW(Shdr) and ElfW(Sym)
// so we ensure that tmp_buf_ is properly aligned to store either.
Expand Down Expand Up @@ -436,34 +462,58 @@ static ssize_t ReadPersistent(int fd, void *buf, size_t count) {
return static_cast<ssize_t>(num_bytes);
}

// Read up to "count" bytes from "offset" in the file pointed by file
// descriptor "fd" into the buffer starting at "buf". On success,
// return the number of bytes read. Otherwise, return -1.
static ssize_t ReadFromOffset(const int fd, void *buf, const size_t count,
const off_t offset) {
off_t off = lseek(fd, offset, SEEK_SET);
if (off == (off_t)-1) {
ABSL_RAW_LOG(WARNING, "lseek(%d, %jd, SEEK_SET) failed: errno=%d", fd,
static_cast<intmax_t>(offset), errno);
return -1;
// Read up to "count" bytes from "offset" into the buffer starting at "buf",
// while handling short reads and EINTR. On success, return the number of bytes
// read. Otherwise, return -1.
ssize_t CachingFile::ReadFromOffset(void *buf, size_t count, off_t offset) {
char *dst = static_cast<char *>(buf);
size_t read = 0;
while (read < count) {
// Look in cache first.
if (offset >= cache_start_ && offset < cache_limit_) {
const char *hit_start = &cache_[offset - cache_start_];
const size_t n =
std::min(count - read, static_cast<size_t>(cache_limit_ - offset));
memcpy(dst, hit_start, n);
dst += n;
read += static_cast<size_t>(n);
offset += static_cast<off_t>(n);
continue;
}

cache_start_ = 0;
cache_limit_ = 0;
ssize_t n = pread(fd_, cache_, cache_size_, offset);
if (n < 0) {
if (errno == EINTR) {
continue;
}
ABSL_RAW_LOG(WARNING, "read failed: errno=%d", errno);
return -1;
}
if (n == 0) { // Reached EOF.
break;
}

cache_start_ = offset;
cache_limit_ = offset + static_cast<off_t>(n);
// Next iteration will copy from cache into dst.
}
return ReadPersistent(fd, buf, count);
return static_cast<ssize_t>(read);
}

// Try reading exactly "count" bytes from "offset" bytes in a file
// pointed by "fd" into the buffer starting at "buf" while handling
// short reads and EINTR. On success, return true. Otherwise, return
// false.
static bool ReadFromOffsetExact(const int fd, void *buf, const size_t count,
const off_t offset) {
ssize_t len = ReadFromOffset(fd, buf, count, offset);
// Try reading exactly "count" bytes from "offset" bytes into the buffer
// starting at "buf" while handling short reads and EINTR. On success, return
// true. Otherwise, return false.
bool CachingFile::ReadFromOffsetExact(void *buf, size_t count, off_t offset) {
ssize_t len = ReadFromOffset(buf, count, offset);
return len >= 0 && static_cast<size_t>(len) == count;
}

// Returns elf_header.e_type if the file pointed by fd is an ELF binary.
static int FileGetElfType(const int fd) {
static int FileGetElfType(CachingFile *file) {
ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
if (!file->ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) {
return -1;
}
if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) {
Expand All @@ -478,8 +528,8 @@ static int FileGetElfType(const int fd) {
// To keep stack consumption low, we would like this function to not get
// inlined.
static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType(
const int fd, ElfW(Half) sh_num, const off_t sh_offset, ElfW(Word) type,
ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) {
CachingFile *file, ElfW(Half) sh_num, const off_t sh_offset,
ElfW(Word) type, ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) {
ElfW(Shdr) *buf = reinterpret_cast<ElfW(Shdr) *>(tmp_buf);
const size_t buf_entries = tmp_buf_size / sizeof(buf[0]);
const size_t buf_bytes = buf_entries * sizeof(buf[0]);
Expand All @@ -490,7 +540,7 @@ static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType(
const size_t num_bytes_to_read =
(buf_bytes > num_bytes_left) ? num_bytes_left : buf_bytes;
const off_t offset = sh_offset + static_cast<off_t>(i * sizeof(buf[0]));
const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read, offset);
const ssize_t len = file->ReadFromOffset(buf, num_bytes_to_read, offset);
if (len < 0) {
ABSL_RAW_LOG(
WARNING,
Expand Down Expand Up @@ -524,11 +574,17 @@ static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType(
// but there has (as yet) been no need for anything longer either.
const int kMaxSectionNameLen = 64;

// Small cache to use for miscellaneous file reads.
const int kSmallFileCacheSize = 100;

bool ForEachSection(int fd,
const std::function<bool(absl::string_view name,
const ElfW(Shdr) &)> &callback) {
char buf[kSmallFileCacheSize];
CachingFile file(fd, buf, sizeof(buf));

ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
if (!file.ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) {
return false;
}

Expand All @@ -540,21 +596,21 @@ bool ForEachSection(int fd,
ElfW(Shdr) shstrtab;
off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) +
elf_header.e_shentsize * elf_header.e_shstrndx;
if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) {
if (!file.ReadFromOffsetExact(&shstrtab, sizeof(shstrtab), shstrtab_offset)) {
return false;
}

for (int i = 0; i < elf_header.e_shnum; ++i) {
ElfW(Shdr) out;
off_t section_header_offset =
static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i;
if (!ReadFromOffsetExact(fd, &out, sizeof(out), section_header_offset)) {
if (!file.ReadFromOffsetExact(&out, sizeof(out), section_header_offset)) {
return false;
}
off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out.sh_name;
char header_name[kMaxSectionNameLen];
ssize_t n_read =
ReadFromOffset(fd, &header_name, kMaxSectionNameLen, name_offset);
file.ReadFromOffset(&header_name, kMaxSectionNameLen, name_offset);
if (n_read < 0) {
return false;
} else if (n_read > kMaxSectionNameLen) {
Expand Down Expand Up @@ -584,8 +640,10 @@ bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
return false;
}

char buf[kSmallFileCacheSize];
CachingFile file(fd, buf, sizeof(buf));
ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
if (!file.ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) {
return false;
}

Expand All @@ -597,18 +655,18 @@ bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
ElfW(Shdr) shstrtab;
off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) +
elf_header.e_shentsize * elf_header.e_shstrndx;
if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) {
if (!file.ReadFromOffsetExact(&shstrtab, sizeof(shstrtab), shstrtab_offset)) {
return false;
}

for (int i = 0; i < elf_header.e_shnum; ++i) {
off_t section_header_offset =
static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i;
if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) {
if (!file.ReadFromOffsetExact(out, sizeof(*out), section_header_offset)) {
return false;
}
off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out->sh_name;
ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset);
ssize_t n_read = file.ReadFromOffset(&header_name, name_len, name_offset);
if (n_read < 0) {
return false;
} else if (static_cast<size_t>(n_read) != name_len) {
Expand Down Expand Up @@ -683,7 +741,7 @@ static const char *ComputeOffset(const char *base, ptrdiff_t offset) {
// To keep stack consumption low, we would like this function to not get
// inlined.
static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol(
const void *const pc, const int fd, char *out, size_t out_size,
const void *const pc, CachingFile *file, char *out, size_t out_size,
ptrdiff_t relocation, const ElfW(Shdr) * strtab, const ElfW(Shdr) * symtab,
const ElfW(Shdr) * opd, char *tmp_buf, size_t tmp_buf_size) {
if (symtab == nullptr) {
Expand Down Expand Up @@ -716,7 +774,7 @@ static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol(
const size_t entries_in_chunk =
std::min(num_remaining_symbols, buf_entries);
const size_t bytes_in_chunk = entries_in_chunk * sizeof(buf[0]);
const ssize_t len = ReadFromOffset(fd, buf, bytes_in_chunk, offset);
const ssize_t len = file->ReadFromOffset(buf, bytes_in_chunk, offset);
SAFE_ASSERT(len >= 0);
SAFE_ASSERT(static_cast<size_t>(len) % sizeof(buf[0]) == 0);
const size_t num_symbols_in_buf = static_cast<size_t>(len) / sizeof(buf[0]);
Expand Down Expand Up @@ -772,12 +830,12 @@ static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol(
if (found_match) {
const off_t off =
static_cast<off_t>(strtab->sh_offset) + best_match.st_name;
const ssize_t n_read = ReadFromOffset(fd, out, out_size, off);
const ssize_t n_read = file->ReadFromOffset(out, out_size, off);
if (n_read <= 0) {
// This should never happen.
ABSL_RAW_LOG(WARNING,
"Unable to read from fd %d at offset %lld: n_read = %zd", fd,
static_cast<long long>(off), n_read);
"Unable to read from fd %d at offset %lld: n_read = %zd",
file->fd(), static_cast<long long>(off), n_read);
return SYMBOL_NOT_FOUND;
}
ABSL_RAW_CHECK(static_cast<size_t>(n_read) <= out_size,
Expand Down Expand Up @@ -827,22 +885,24 @@ FindSymbolResult Symbolizer::GetSymbolFromObjectFile(
}
}

CachingFile file(obj.fd, file_cache_, sizeof(file_cache_));

// Consult a regular symbol table, then fall back to the dynamic symbol table.
for (const auto symbol_table_type : {SHT_SYMTAB, SHT_DYNSYM}) {
if (!GetSectionHeaderByType(obj.fd, obj.elf_header.e_shnum,
if (!GetSectionHeaderByType(&file, obj.elf_header.e_shnum,
static_cast<off_t>(obj.elf_header.e_shoff),
static_cast<ElfW(Word)>(symbol_table_type),
&symtab, tmp_buf, tmp_buf_size)) {
continue;
}
if (!ReadFromOffsetExact(
obj.fd, &strtab, sizeof(strtab),
if (!file.ReadFromOffsetExact(
&strtab, sizeof(strtab),
static_cast<off_t>(obj.elf_header.e_shoff +
symtab.sh_link * sizeof(symtab)))) {
continue;
}
const FindSymbolResult rc =
FindSymbol(pc, obj.fd, out, out_size, relocation, &strtab, &symtab,
FindSymbol(pc, &file, out, out_size, relocation, &strtab, &symtab,
opd_ptr, tmp_buf, tmp_buf_size);
if (rc != SYMBOL_NOT_FOUND) {
return rc;
Expand Down Expand Up @@ -1323,15 +1383,19 @@ static bool MaybeInitializeObjFile(ObjFile *obj) {
ABSL_RAW_LOG(WARNING, "%s: open failed: errno=%d", obj->filename, errno);
return false;
}
obj->elf_type = FileGetElfType(obj->fd);

char buf[kSmallFileCacheSize];
CachingFile file(obj->fd, buf, sizeof(buf));

obj->elf_type = FileGetElfType(&file);
if (obj->elf_type < 0) {
ABSL_RAW_LOG(WARNING, "%s: wrong elf type: %d", obj->filename,
obj->elf_type);
return false;
}

if (!ReadFromOffsetExact(obj->fd, &obj->elf_header, sizeof(obj->elf_header),
0)) {
if (!file.ReadFromOffsetExact(&obj->elf_header, sizeof(obj->elf_header),
0)) {
ABSL_RAW_LOG(WARNING, "%s: failed to read elf header", obj->filename);
return false;
}
Expand All @@ -1341,7 +1405,7 @@ static bool MaybeInitializeObjFile(ObjFile *obj) {
size_t num_interesting_load_segments = 0;
for (int j = 0; j < phnum; j++) {
ElfW(Phdr) phdr;
if (!ReadFromOffsetExact(obj->fd, &phdr, sizeof(phdr), phoff)) {
if (!file.ReadFromOffsetExact(&phdr, sizeof(phdr), phoff)) {
ABSL_RAW_LOG(WARNING, "%s: failed to read program header %d",
obj->filename, j);
return false;
Expand Down

0 comments on commit aa14601

Please sign in to comment.