diff --git a/src/stirling/binaries/BUILD.bazel b/src/stirling/binaries/BUILD.bazel index f9b97bef9aa..9b6954b6b46 100644 --- a/src/stirling/binaries/BUILD.bazel +++ b/src/stirling/binaries/BUILD.bazel @@ -76,6 +76,14 @@ pl_cc_binary( ], ) +pl_cc_binary( + name = "go_binary_parse_profiling", + srcs = ["go_binary_parse_profiling.cc"], + deps = [ + "//src/stirling:cc_library", + ], +) + cc_image( name = "stirling_dt_image", base = ":stirling_binary_base_image", diff --git a/src/stirling/binaries/go_binary_parse_profiling.cc b/src/stirling/binaries/go_binary_parse_profiling.cc new file mode 100644 index 00000000000..a7476ab62e5 --- /dev/null +++ b/src/stirling/binaries/go_binary_parse_profiling.cc @@ -0,0 +1,71 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "src/common/base/base.h" +#include "src/common/base/env.h" +#include "src/stirling/source_connectors/socket_tracer/uprobe_symaddrs.h" + +using px::StatusOr; +using px::stirling::PopulateGoTLSDebugSymbols; +using px::stirling::obj_tools::DwarfReader; +using px::stirling::obj_tools::ElfReader; + +//----------------------------------------------------------------------------- +// This utility is designed to isolate parsing the debug symbols of a Go binary. This +// verifies that the go version detection code is functioning as well. This is useful +// for debugging when the Go elf/DWARF parsing is not working correctly and has been the +// source of a few PEM crashes (gh#1300, gh#1646). This makes it easy for asking end users to run +// against their binaries when they are sensitive (proprietary) and we can't debug them ourselves. +//----------------------------------------------------------------------------- + +int main(int argc, char** argv) { + px::EnvironmentGuard env_guard(&argc, argv); + + if (argc < 2) { + LOG(FATAL) << absl::Substitute("Expected binary argument to be provided. Instead received $0", + *argv); + } + + std::string binary(argv[1]); + + StatusOr> elf_reader_status = ElfReader::Create(binary); + if (!elf_reader_status.ok()) { + LOG(WARNING) << absl::Substitute( + "Failed to parse elf binary $0 with" + "Message = $1", + binary, elf_reader_status.msg()); + } + std::unique_ptr elf_reader = elf_reader_status.ConsumeValueOrDie(); + + StatusOr> dwarf_reader_status = + DwarfReader::CreateIndexingAll(binary); + if (!dwarf_reader_status.ok()) { + VLOG(1) << absl::Substitute( + "Failed to get binary $0 debug symbols. " + "Message = $1", + binary, dwarf_reader_status.msg()); + } + std::unique_ptr dwarf_reader = dwarf_reader_status.ConsumeValueOrDie(); + + struct go_tls_symaddrs_t symaddrs; + auto status = PopulateGoTLSDebugSymbols(elf_reader.get(), dwarf_reader.get(), &symaddrs); + + if (!status.ok()) { + LOG(ERROR) << absl::Substitute("debug symbol parsing failed with: $0", status.msg()); + } +} diff --git a/src/stirling/obj_tools/elf_reader.cc b/src/stirling/obj_tools/elf_reader.cc index 4aec43d9c8d..9b497707f9a 100644 --- a/src/stirling/obj_tools/elf_reader.cc +++ b/src/stirling/obj_tools/elf_reader.cc @@ -605,6 +605,19 @@ StatusOr ElfReader::SectionWithName(std::string_view section_na return error::NotFound("Could not find section=$0 in binary=$1", section_name, binary_path_); } +StatusOr ElfReader::VirtualAddrToBinaryAddr(uint64_t virtual_addr) { + for (int i = 0; i < elf_reader_.segments.size(); i++) { + ELFIO::segment* segment = elf_reader_.segments[i]; + uint64_t virt_addr = segment->get_virtual_address(); + uint64_t offset = segment->get_offset(); + uint64_t size = segment->get_file_size(); + if (virtual_addr >= virt_addr && virtual_addr < virt_addr + size) { + return virtual_addr - virt_addr + offset; + } + } + return error::Internal("Could not find binary address for virtual address=$0", virtual_addr); +} + StatusOr ElfReader::SymbolByteCode(std::string_view section, const SymbolInfo& symbol) { PX_ASSIGN_OR_RETURN(ELFIO::section * text_section, SectionWithName(section)); diff --git a/src/stirling/obj_tools/elf_reader.h b/src/stirling/obj_tools/elf_reader.h index cd1502b79a4..d492122a26e 100644 --- a/src/stirling/obj_tools/elf_reader.h +++ b/src/stirling/obj_tools/elf_reader.h @@ -160,6 +160,18 @@ class ElfReader { */ StatusOr SymbolByteCode(std::string_view section, const SymbolInfo& symbol); + /** + * Returns the binary address that corresponds to the given virtual address. + * This virtual address will not be subject to ASLR since the calculation is based entirely on the + * ELF file and its section and segment information. Given this, most of the time + * ElfAddressConverter::VirtualAddrToBinaryAddr is a more appropriate utility to use. + * + * Certain use cases may require this function, such as cases where the Go toolchain + * embeds virtual addresses within a binary and must be parsed (See ReadGoBuildVersion and + * ReadGoString in go_syms.cc). + */ + StatusOr VirtualAddrToBinaryAddr(uint64_t virtual_addr); + /** * Returns the virtual address in the ELF file of offset 0x0. Calculated by finding the first * loadable segment and returning its virtual address minus its file offset. diff --git a/src/stirling/obj_tools/elf_reader_test.cc b/src/stirling/obj_tools/elf_reader_test.cc index c4390806892..a2c9a56a98b 100644 --- a/src/stirling/obj_tools/elf_reader_test.cc +++ b/src/stirling/obj_tools/elf_reader_test.cc @@ -40,6 +40,55 @@ using ::testing::UnorderedElementsAre; using ::px::operator<<; +// Models ELF section output information from objdump -h. +// ELFIO::section's do not contain virtual memory addresses like ELFIO::segment's do +// so this struct is used to store the information from objdump. +// Example objdump output: +// +// $ objdump -j .bss -h bazel-bin/src/stirling/obj_tools/testdata/cc/test_exe/test_exe +// +// bazel-bin/src/stirling/obj_tools/testdata/cc/test_exe/test_exe: file format elf64-x86-64 +// +// Sections: +// Idx Name Size VMA LMA File off Algn +// 27 .bss 00002068 00000000000bd100 00000000000bd100 000ba100 2**5 +// ALLOC +struct Section { + std::string name; + int64_t size; + int64_t vma; + int64_t lma; + int64_t file_offset; +}; + +// TODO(ddelnano): Make this function hermetic by providing the objdump output via bazel +StatusOr
ObjdumpSectionNameToAddr(const std::string& path, + const std::string& section_name) { + Section section; + std::string objdump_out = + px::Exec(absl::StrCat("objdump -h -j ", section_name, " ", path)).ValueOrDie(); + std::vector objdump_out_lines = absl::StrSplit(objdump_out, '\n'); + for (auto& line : objdump_out_lines) { + if (line.find(section_name) != std::string::npos) { + std::vector line_split = absl::StrSplit(line, ' ', absl::SkipWhitespace()); + CHECK(!line_split.empty()); + + section.name = std::string(line_split[1]); + section.size = std::stol(std::string(line_split[2]), nullptr, 16); + section.vma = std::stol(std::string(line_split[3]), nullptr, 16); + section.lma = std::stol(std::string(line_split[4]), nullptr, 16); + section.file_offset = std::stol(std::string(line_split[5]), nullptr, 16); + break; + } + } + + if (section.name != section_name) { + return error::Internal("Unable to find section with name $0", section_name); + } + + return section; +} + StatusOr NmSymbolNameToAddr(const std::string& path, const std::string& symbol_name) { // Extract the address from nm as the gold standard. int64_t symbol_addr = -1; @@ -133,6 +182,17 @@ TEST(ElfReaderTest, SymbolAddress) { } } +TEST(ElfReaderTest, VirtualAddrToBinaryAddr) { + const std::string path = kTestExeFixture.Path().string(); + const std::string kDataSection = ".data"; + ASSERT_OK_AND_ASSIGN(const Section section, ObjdumpSectionNameToAddr(path, kDataSection)); + + ASSERT_OK_AND_ASSIGN(std::unique_ptr elf_reader, ElfReader::Create(path)); + const int64_t offset = 1; + ASSERT_OK_AND_ASSIGN(auto binary_addr, elf_reader->VirtualAddrToBinaryAddr(section.vma + offset)); + EXPECT_EQ(binary_addr, section.file_offset + offset); +} + TEST(ElfReaderTest, AddrToSymbol) { const std::string path = kTestExeFixture.Path().string(); const std::string kSymbolName = "CanYouFindThis"; diff --git a/src/stirling/obj_tools/go_syms.cc b/src/stirling/obj_tools/go_syms.cc index e038be4f213..c828ebc0e0a 100644 --- a/src/stirling/obj_tools/go_syms.cc +++ b/src/stirling/obj_tools/go_syms.cc @@ -55,15 +55,14 @@ std::string_view kGoBuildInfoMagic = // Reads a Go string encoded within a buildinfo header. This function is meant to provide the same // functionality as -// https://github.com/golang/go/blob/master/src/debug/buildinfo/buildinfo.go#L244C37-L244C44 +// https://github.com/golang/go/blob/aa97a012b4be393c1725c16a78b92dea81632378/src/debug/buildinfo/buildinfo.go#L282 StatusOr ReadGoString(ElfReader* elf_reader, uint64_t ptr_size, uint64_t ptr_addr, read_ptr_func_t read_ptr) { PX_ASSIGN_OR_RETURN(u8string_view data_addr, elf_reader->BinaryByteCode(ptr_addr, ptr_size)); PX_ASSIGN_OR_RETURN(u8string_view data_len, elf_reader->BinaryByteCode(ptr_addr + ptr_size, ptr_size)); - PX_ASSIGN_OR_RETURN(uint64_t vaddr_offset, elf_reader->GetVirtualAddrAtOffsetZero()); - ptr_addr = read_ptr(data_addr) - vaddr_offset; + PX_ASSIGN_OR_RETURN(ptr_addr, elf_reader->VirtualAddrToBinaryAddr(read_ptr(data_addr))); uint64_t str_length = read_ptr(data_len); PX_ASSIGN_OR_RETURN(std::string_view go_version_bytecode, @@ -136,10 +135,11 @@ StatusOr ReadGoBuildVersion(ElfReader* elf_reader) { } } - PX_ASSIGN_OR_RETURN(uint64_t vaddr_offset, elf_reader->GetVirtualAddrAtOffsetZero()); - - PX_ASSIGN_OR_RETURN(auto s, binary_decoder.ExtractString(ptr_size)); - uint64_t ptr_addr = read_ptr(s) - vaddr_offset; + // Reads the virtual address location of the runtime.buildVersion symbol. + PX_ASSIGN_OR_RETURN(auto runtime_version_vaddr, + binary_decoder.ExtractString(ptr_size)); + PX_ASSIGN_OR_RETURN(uint64_t ptr_addr, + elf_reader->VirtualAddrToBinaryAddr(read_ptr(runtime_version_vaddr))); return ReadGoString(elf_reader, ptr_size, ptr_addr, read_ptr); } diff --git a/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs.cc b/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs.cc index 6629df47881..b9cdac27d68 100644 --- a/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs.cc +++ b/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs.cc @@ -464,6 +464,8 @@ Status PopulateHTTP2DebugSymbols(DwarfReader* dwarf_reader, std::string_view ven return Status::OK(); } +} // namespace + Status PopulateGoTLSDebugSymbols(ElfReader* elf_reader, DwarfReader* dwarf_reader, struct go_tls_symaddrs_t* symaddrs) { PX_ASSIGN_OR_RETURN(std::string build_version, ReadGoBuildVersion(elf_reader)); @@ -510,8 +512,6 @@ Status PopulateGoTLSDebugSymbols(ElfReader* elf_reader, DwarfReader* dwarf_reade return Status::OK(); } -} // namespace - StatusOr GoCommonSymAddrs(ElfReader* elf_reader, DwarfReader* dwarf_reader) { struct go_common_symaddrs_t symaddrs; diff --git a/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs.h b/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs.h index 4886bff1e06..3940078077c 100644 --- a/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs.h +++ b/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs.h @@ -73,5 +73,9 @@ StatusOr OpenSSLSymAddrs(obj_tools::RawFptrManager* f StatusOr NodeTLSWrapSymAddrs(const std::filesystem::path& node_exe, const SemVer& ver); +px::Status PopulateGoTLSDebugSymbols(obj_tools::ElfReader* elf_reader, + obj_tools::DwarfReader* dwarf_reader, + struct go_tls_symaddrs_t* symaddrs); + } // namespace stirling } // namespace px