Skip to content

Commit

Permalink
Revert "Support for DWARFv5 embedded source code extension (#849)" (#870
Browse files Browse the repository at this point in the history
)

This reverts the "feature part" of commit 7dc28dd (#849). The dep updates can stay.

Unfortunately opening a debug session to check for the presence of sources is prohibitively expensive in terms of memory (going from ~450MB to ~3.7GB in a Python process that calls Object.features on an example 1.3GB file). In order to support this we'll have to find a more parsimonious way to check if a file contains sources.
  • Loading branch information
loewenheim authored Oct 1, 2024
1 parent d845f97 commit f9b7fcc
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 108 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

# Unreleased

**Fixes**
- Unship "Support for DWARFv5 embedded source code extension ([#849](https://github.com/getsentry/symbolic/pull/849))".
Unfortunately the check for whether an elf file contains embedded sources is prohibitively expensive in terms of memory.
([#870](https://github.com/getsentry/symbolic/pull/870))

## 12.11.1

**Fixes**
Expand Down
22 changes: 1 addition & 21 deletions symbolic-debuginfo/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,25 +450,13 @@ pub struct FileInfo<'data> {
name: Cow<'data, [u8]>,
/// Path to the file.
dir: Cow<'data, [u8]>,
/// Source code
source: Option<Cow<'data, [u8]>>,
}

impl<'data> FileInfo<'data> {
/// Creates a `FileInfo` with a given directory and the file name.
#[cfg(feature = "dwarf")]
pub fn new(dir: Cow<'data, [u8]>, name: Cow<'data, [u8]>) -> Self {
Self::with_source(dir, name, None)
}

/// Creates a `FileInfo` with a given directory, the file name, and optional source code.
#[cfg(feature = "dwarf")]
pub fn with_source(
dir: Cow<'data, [u8]>,
name: Cow<'data, [u8]>,
source: Option<Cow<'data, [u8]>>,
) -> Self {
FileInfo { name, dir, source }
FileInfo { name, dir }
}

/// Creates a `FileInfo` from a joined path by trying to split it.
Expand All @@ -482,7 +470,6 @@ impl<'data> FileInfo<'data> {
Some(dir) => Cow::Borrowed(dir),
None => Cow::default(),
},
source: None,
}
}

Expand All @@ -498,7 +485,6 @@ impl<'data> FileInfo<'data> {
Some(dir) => Cow::Owned(dir.to_vec()),
None => Cow::default(),
},
source: None,
}
}

Expand All @@ -507,7 +493,6 @@ impl<'data> FileInfo<'data> {
FileInfo {
name: Cow::Borrowed(name),
dir: Cow::default(),
source: None,
}
}

Expand All @@ -521,11 +506,6 @@ impl<'data> FileInfo<'data> {
from_utf8_cow_lossy(&self.dir)
}

/// The embedded source code as an UTF-8 string.
pub fn source_str(&self) -> Option<Cow<'data, str>> {
self.source.as_ref().map(from_utf8_cow_lossy)
}

/// The full path to the file, relative to the compilation directory.
pub fn path_str(&self) -> String {
let joined = join_path(&self.dir_str(), &self.name_str());
Expand Down
42 changes: 4 additions & 38 deletions symbolic-debuginfo/src/dwarf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
//! [`PeObject`]: ../pe/struct.PeObject.html
use std::borrow::Cow;
use std::collections::{BTreeSet, HashMap};
use std::collections::BTreeSet;
use std::error::Error;
use std::fmt;
use std::marker::PhantomData;
Expand Down Expand Up @@ -692,7 +692,7 @@ impl<'d, 'a> DwarfUnit<'d, 'a> {
line_program: &LineNumberProgramHeader<'d>,
file: &LineProgramFileEntry<'d>,
) -> FileInfo<'d> {
FileInfo::with_source(
FileInfo::new(
Cow::Borrowed(resolve_byte_name(
self.bcsymbolmap,
file.directory(line_program)
Expand All @@ -703,14 +703,6 @@ impl<'d, 'a> DwarfUnit<'d, 'a> {
self.bcsymbolmap,
self.inner.slice_value(file.path_name()).unwrap_or_default(),
)),
file.source().and_then(|source| {
let unit_ref = self.inner.unit.unit_ref(self.inner.info);
match unit_ref.attr_string(source) {
Ok(source) if source.is_empty() => None,
Err(_) => None,
Ok(source) => Some(Cow::Borrowed(source.slice())),
}
}),
)
}

Expand Down Expand Up @@ -1303,9 +1295,6 @@ impl std::iter::FusedIterator for DwarfUnitIterator<'_> {}
pub struct DwarfDebugSession<'data> {
cell: SelfCell<Box<DwarfSections<'data>>, DwarfInfo<'data>>,
bcsymbolmap: Option<Arc<BcSymbolMap<'data>>>,
// We store the "lookup path" for each entry here to avoid lifetime issues w.r.t
// HashMap<_, SourceFileDescriptor<'data>>, as we can't construct this in a method call.
sources_path_to_file_idx: OnceCell<HashMap<String, usize>>,
}

impl<'data> DwarfDebugSession<'data> {
Expand All @@ -1327,7 +1316,6 @@ impl<'data> DwarfDebugSession<'data> {
Ok(DwarfDebugSession {
cell,
bcsymbolmap: None,
sources_path_to_file_idx: OnceCell::default(),
})
}

Expand Down Expand Up @@ -1360,33 +1348,11 @@ impl<'data> DwarfDebugSession<'data> {
}

/// See [DebugSession::source_by_path] for more information.
/// This lookup returns entries that match a given [FileEntry::abs_path_str].
///
/// Note that this does not load additional sources from disk and only works with sources
/// embedded directly in the debug information (DW_LNCT_LLVM_source).
pub fn source_by_path(
&self,
path: &str,
_path: &str,
) -> Result<Option<SourceFileDescriptor<'_>>, DwarfError> {
// Construct / fetch a lookup table to avoid scanning and comparing each file's path in this
// operation:
let sources = self.sources_path_to_file_idx.get_or_init(|| {
let mut res = HashMap::new();
for (i, file) in self.files().enumerate() {
if let Ok(file) = file {
if file.source_str().is_some() {
res.insert(file.abs_path_str(), i);
}
}
}
res
});

Ok(sources.get(path).map(|&idx| {
// These unwraps hold by construction above
let file = self.files().nth(idx).unwrap().unwrap();
SourceFileDescriptor::new_embedded(file.source_str().unwrap(), None)
}))
Ok(None)
}
}

Expand Down
9 changes: 1 addition & 8 deletions symbolic-debuginfo/src/elf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,14 +537,7 @@ impl<'data> ElfObject<'data> {

/// Determines whether this object contains embedded source.
pub fn has_sources(&self) -> bool {
// Note: It'd be great to be able to determine this without iterating over all file entries.
// Unfortunately, though, this seems to be the only correct implementation.
match self.debug_session() {
Ok(session) => session
.files()
.any(|f| f.is_ok_and(|f| f.source_str().is_some())),
Err(_) => false,
}
false
}

/// Determines whether this object is malformed and was only partially parsed
Expand Down
7 changes: 1 addition & 6 deletions symbolic-debuginfo/src/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,7 @@ impl<'data> WasmObject<'data> {

/// Determines whether this object contains embedded source.
pub fn has_sources(&self) -> bool {
match self.debug_session() {
Ok(session) => session
.files()
.any(|f| f.is_ok_and(|f| f.source_str().is_some())),
Err(_) => false,
}
false
}

/// Determines whether this object is malformed and was only partially parsed
Expand Down
36 changes: 1 addition & 35 deletions symbolic-debuginfo/tests/test_objects.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::HashMap, env, ffi::CString, fmt, io::BufWriter};
use std::{env, ffi::CString, fmt, io::BufWriter};

use symbolic_common::ByteView;
use symbolic_debuginfo::{
Expand Down Expand Up @@ -871,8 +871,6 @@ fn test_wasm_symbols() -> Result<(), Error> {
Some("bda18fd85d4a4eb893022d6bfad846b1".into())
);

assert!(!object.has_sources());

let symbols = object.symbol_map();
insta::assert_debug_snapshot!("wasm_symbols", SymbolsDebug(&symbols));

Expand All @@ -895,35 +893,3 @@ fn test_wasm_line_program() -> Result<(), Error> {

Ok(())
}

#[test]
fn test_wasm_has_sources() -> Result<(), Error> {
let view = ByteView::open(fixture("wasm/embed-source.wasm"))?;
let object = Object::parse(&view)?;

assert!(object.has_sources());

let session = object.debug_session()?;

let files_with_source = session
.files()
.filter_map(|x| {
let file_entry = x.ok()?;
let source = file_entry.source_str()?;
Some((file_entry.abs_path_str(), source))
})
.collect::<HashMap<_, _>>();

insta::assert_debug_snapshot!(files_with_source, @r###"
{
"/tmp/foo.c": "int main(void) { return 42; }\n",
}
"###);

assert!(session.source_by_path("foo/bar.c").unwrap().is_none());
assert!(session.source_by_path("foo.c").unwrap().is_none());
// source_by_path matches the absolute file path
assert!(session.source_by_path("/tmp/foo.c").unwrap().is_some());

Ok(())
}
Binary file removed symbolic-testutils/fixtures/wasm/embed-source.wasm
Binary file not shown.

0 comments on commit f9b7fcc

Please sign in to comment.