Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: iterative DFS instead of recursion for Mach-O exports #148

Merged
merged 5 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 137 additions & 79 deletions lib/src/modules/macho/parser.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashSet;

use crate::modules::protos;
use bstr::{BStr, ByteSlice};
use itertools::Itertools;
Expand Down Expand Up @@ -917,67 +919,95 @@ impl<'a> MachOFile<'a> {

fn parse_export_node(
&mut self,
) -> impl FnMut(&'a [u8], u64, &BStr) -> IResult<&'a [u8], String> + '_
{
move |data: &'a [u8], offset: u64, prefix: &BStr| {
let (remainder, length) = uleb128(&data[offset as usize..])?;
let mut remaining_data = remainder;

if length != 0 {
let (remainder, flags) = uleb128(remaining_data)?;
match flags {
EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER => {
let (remainder, _stub_offset) = uleb128(remainder)?;

let (remainder, _resolver_offset) =
uleb128(remainder)?;
remaining_data = remainder;
) -> impl FnMut(&'a [u8], u64) -> IResult<&'a [u8], usize> + '_ {
move |data: &'a [u8], offset: u64| {
let mut stack = Vec::<ExportNode>::new();
let mut visited = HashSet::<usize>::new();

stack.push(ExportNode {
offset: offset as usize,
prefix: "".to_string(),
});

while !data.is_empty()
&& (offset as usize) < data.len()
&& !stack.is_empty()
{
let export_node = stack.pop().unwrap();

// If node was already visited, continue without processing it.
if !visited.insert(export_node.offset) {
continue;
}

let node_data = match data.get(export_node.offset..) {
Some(data) => data,
None => continue,
};

let (mut remaining_data, length) = uleb128(node_data)?;

if length != 0 {
let (remainder, flags) = uleb128(remaining_data)?;
match flags {
EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER => {
let (remainder, _stub_offset) =
uleb128(remainder)?;

let (remainder, _resolver_offset) =
uleb128(remainder)?;
remaining_data = remainder;
}
EXPORT_SYMBOL_FLAGS_REEXPORT => {
let (remainder, _ordinal) = uleb128(remainder)?;

let (remainder, _label) =
map(
tuple((
take_till(|b| b == b'\x00'),
tag(b"\x00"),
)),
|(s, _)| s,
)(remainder)?;

remaining_data = remainder;
}
EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION => {
let (remainder, _offset) = uleb128(remainder)?;
remaining_data = remainder;
}
_ => {}
}
EXPORT_SYMBOL_FLAGS_REEXPORT => {
let (remainder, _ordinal) = uleb128(remainder)?;
}

let (remainder, _label) = map(
let (mut edge_remainder, edges) = u8(remaining_data)?;

for _ in 0..edges {
let (remainder, strr) =
map(
tuple((take_till(|b| b == b'\x00'), tag(b"\x00"))),
|(s, _)| s,
)(
remainder
)?;

remaining_data = remainder;
)(edge_remainder)?;
let edge_label = BStr::new(strr);
let (remainder, edge_offset) = uleb128(remainder)?;
if let Ok(edge_label_str) = edge_label.to_str() {
stack.push(ExportNode {
offset: edge_offset as usize,
prefix: format!(
"{}{}",
export_node.prefix, edge_label_str
),
});
}
EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION => {
let (remainder, _offset) = uleb128(remainder)?;
remaining_data = remainder;
}
_ => {}
edge_remainder = remainder;
}
}

let (remainder, edges) = u8(remaining_data)?;
let mut edge_remainder = remainder;

for _ in 0..edges {
let (remainder, strr) = map(
tuple((take_till(|b| b == b'\x00'), tag(b"\x00"))),
|(s, _)| s,
)(edge_remainder)?;
let edge_label = BStr::new(strr);
let (remainder, edge_offset) = uleb128(remainder)?;
let (_, _) = self.parse_export_node()(
data,
edge_offset,
BStr::new(&bstr::concat([prefix, edge_label])),
)?;
edge_remainder = remainder;
}

if length != 0 {
if let Ok(prefix) = prefix.to_str() {
self.exports.push(prefix.to_string())
if length != 0 {
self.exports.push(export_node.prefix)
}
}

Ok((data, prefix.to_str().unwrap().to_string()))
Ok((data, 0))
}
}

Expand All @@ -988,8 +1018,7 @@ impl<'a> MachOFile<'a> {
) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec<String>> + '_ {
move |data: &'a [u8]| {
let exports = Vec::<String>::new();
let (remainder, _) =
self.parse_export_node()(data, 0, BStr::new(""))?;
let (remainder, _) = self.parse_export_node()(data, 0)?;

Ok((remainder, exports))
}
Expand Down Expand Up @@ -1394,6 +1423,11 @@ struct MinVersion {
sdk: u32,
}

struct ExportNode {
offset: usize,
prefix: String,
}

/// Parser that reads a 32-bits or 64-bits
fn uint(
endianness: Endianness,
Expand All @@ -1409,21 +1443,33 @@ fn uint(
}
}

/// Parser that reads ULEB128.
/// https://en.wikipedia.org/wiki/LEB128
/// Parser that reads [ULEB128][1].
///
/// Notice however that this function returns a `u64`, is able to parse
/// number up to 72057594037927935. When parsing larger number it fails,
/// even if they are valid ULEB128.
///
/// [1]: https://en.wikipedia.org/wiki/LEB128
fn uleb128(input: &[u8]) -> IResult<&[u8], u64> {
let mut val: u64 = 0;
let mut shift: u64 = 0;
let mut shift: u32 = 0;

let mut data = input;
let mut byte: u8;

loop {
// Read one byte of data.
(data, byte) = u8(data)?;

val |= ((byte & !(1 << 7)) as u64) << shift;
// Use all the bits, except the most significant one.
let b = (byte & 0x7f) as u64;

val |= b
.checked_shl(shift)
.ok_or(Err::Error(Error::new(input, ErrorKind::TooLarge)))?;

if byte & (1 << 7) == 0 {
// Break if the most significant bit is zero.
if byte & 0x80 == 0 {
break;
}

Expand Down Expand Up @@ -1798,31 +1844,43 @@ impl From<&MinVersion> for protos::macho::MinVersion {

#[test]
fn test_uleb_parsing() {
let uleb_128_in = vec![0b1000_0001, 0b000_0001];
let (_remainder, result) = uleb128(&uleb_128_in).unwrap();
assert_eq!(129, result);
let (_, n) = uleb128(&[0b1000_0001, 0b000_0001]).unwrap();
assert_eq!(129, n);

let (_, n) = uleb128(&[0b1000_0000, 0b0000_0001]).unwrap();
assert_eq!(128, n);

let (_, n) = uleb128(&[0b111_1111]).unwrap();
assert_eq!(127, n);

let (_, n) = uleb128(&[0b111_1110]).unwrap();
assert_eq!(126, n);

let (_, n) = uleb128(&[0b000_0000]).unwrap();
assert_eq!(0, n);

let (_, n) = uleb128(&[0b1010_0000, 0b0000_0001]).unwrap();
assert_eq!(160, n);

let uleb_128_in = vec![0b1000_0000, 0b0000_0001];
let (_remainder, result) = uleb128(&uleb_128_in).unwrap();
assert_eq!(128, result);
let (_, n) = uleb128(&[0b1001_0110, 0b0000_0101]).unwrap();
assert_eq!(662, n);

let uleb_128_in = vec![0b111_1111];
let (_remainder, result) = uleb128(&uleb_128_in).unwrap();
assert_eq!(127, result);
let (_, n) = uleb128(&[0b1110_0101, 0b1000_1110, 0b0010_0110]).unwrap();
assert_eq!(624485, n);

let uleb_128_in = vec![0b111_1110];
let (_remainder, result) = uleb128(&uleb_128_in).unwrap();
assert_eq!(126, result);
let (_, n) = uleb128(&[0x80, 0x80, 0x80, 0x00]).unwrap();
assert_eq!(0, n);

let uleb_128_in = vec![0b000_0000];
let (_remainder, result) = uleb128(&uleb_128_in).unwrap();
assert_eq!(0, result);
let (_, n) =
uleb128(&[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00]).unwrap();
assert_eq!(0, n);

let uleb_128_in = vec![0b1010_0000, 0b0000_0001];
let (_remainder, result) = uleb128(&uleb_128_in).unwrap();
assert_eq!(160, result);
let (_, n) =
uleb128(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f]).unwrap();
assert_eq!(72057594037927935, n);

let uleb_128_in = vec![0b10010110, 0b00000101];
let (_remainder, result) = uleb128(&uleb_128_in).unwrap();
assert_eq!(662, result);
assert!(uleb128(&[
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00,
])
.is_err());
}
Original file line number Diff line number Diff line change
Expand Up @@ -456,39 +456,39 @@ min_version:
version: "10.13.0"
sdk: "10.13.0"
exports:
- "__Z18AllocateStringCopyRPcRK8wxString"
- "__Z18ACCT_GetFirstIndexPK15CAPF_DataSourceRl"
- "__Z19CBB_GetResmanEventsP15CAPF_DataSourcePFvP20HarmonyResourceEventPvES3_P15HarmonyNBTicket"
- "__Z19CBB_SetEFControlBarP15CAPF_DataSourceP15CAPF_PluginInfoPv"
- "__Z19CB_ImportControlBarP15CAPF_DataSourceP15CAPF_PluginInfoPv"
- "__Z19ACCT_FreeStringListPP17HarmonyAttributesi"
- "__Z19ACCT_ClearJobLogXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z19ACCT_GetEventsExXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z16CreateControlBarP8stCBInfoP20HarmonySessionHandle"
- "__Z16ACCT_ClearJobLogPK15CAPF_DataSource"
- "__Z16out_HarmonyAttrsRNSt3__113basic_ostreamIcNS_11char_traitsIcEEEEiP17HarmonyAttributes"
- "__Z14ACCT_GetJobLogPK15CAPF_DataSourcePPP17HarmonyAttributesRl"
- "__Z10InitializeP15CAPF_PluginInfoP14PLUGINMEMBLOCK"
- "__Z17ACCT_GetJobLogXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z21CBB_GetControlBarDataP15CAPF_DataSourceP15CAPF_PluginInfoPv"
- "__Z21ACCT_GetJobLogPortionPK15CAPF_DataSourceRlS2_PPP17HarmonyAttributesS2_"
- "__Z21ACCT_GetFirstIndexXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z21APF_ds2HarmonyHandlesPK15CAPF_DataSourcePP29harmonyDataSourceSpecificDataPP19HarmonyServerHandle"
- "__Z21APF_harmonyGetSessionP29harmonyDataSourceSpecificData22APF_harmonySessionType"
- "__Z20CBB_DeleteControlBarP15CAPF_DataSourceRK8wxString"
- "__Z20ACCT_GetJobLogLengthPK15CAPF_DataSourceRl"
- "__Z22CBB_FreeControlBarDataP15CAPF_DataSourceP15CAPF_PluginInfoPv"
- "__Z22ACCT_FreeStringListXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z24CBB_GetControlbarSupportP15CAPF_DataSourceP15CAPF_PluginInfoPv"
- "__Z24ACCT_GetJobLogPortionXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z23ACCT_GetJobLogLengthXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z23APF_ChkTicketOrTimedOutP15CAPF_DataSourceR15HarmonyNBTicketR13HarmonyResult"
- "__Z23GetFeatureSessionHandlePK15CAPF_DataSource22APF_harmonySessionTypePP20HarmonySessionHandle"
- "__Z25APF_H_GetLocalizedStringsP29harmonyDataSourceSpecificDataPPciS2_i"
- "__Z26APF_H_GetLocalizedStringNBP15CAPF_DataSourceR8wxStringS2_iS2_"
- "__Z36APF_H_GetLocalizedStringNB_AlternateP15CAPF_DataSourceR8wxStringS2_iS2_"
- "__ZlsRNSt3__113basic_ostreamIcNS_11char_traitsIcEEEEP17HarmonyAttributes"
- "_APF_Plugin_Initialize"
- "_APF_Plugin_DisconnectingDataSource"
- "_APF_Plugin_Unload"
- "_APF_Plugin_Terminate"
- "_APF_Plugin_Unload"
- "_APF_Plugin_DisconnectingDataSource"
- "_APF_Plugin_Initialize"
- "__ZlsRNSt3__113basic_ostreamIcNS_11char_traitsIcEEEEP17HarmonyAttributes"
- "__Z36APF_H_GetLocalizedStringNB_AlternateP15CAPF_DataSourceR8wxStringS2_iS2_"
- "__Z26APF_H_GetLocalizedStringNBP15CAPF_DataSourceR8wxStringS2_iS2_"
- "__Z25APF_H_GetLocalizedStringsP29harmonyDataSourceSpecificDataPPciS2_i"
- "__Z23GetFeatureSessionHandlePK15CAPF_DataSource22APF_harmonySessionTypePP20HarmonySessionHandle"
- "__Z23APF_ChkTicketOrTimedOutP15CAPF_DataSourceR15HarmonyNBTicketR13HarmonyResult"
- "__Z23ACCT_GetJobLogLengthXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z24ACCT_GetJobLogPortionXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z24CBB_GetControlbarSupportP15CAPF_DataSourceP15CAPF_PluginInfoPv"
- "__Z22ACCT_FreeStringListXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z22CBB_FreeControlBarDataP15CAPF_DataSourceP15CAPF_PluginInfoPv"
- "__Z20ACCT_GetJobLogLengthPK15CAPF_DataSourceRl"
- "__Z20CBB_DeleteControlBarP15CAPF_DataSourceRK8wxString"
- "__Z21APF_harmonyGetSessionP29harmonyDataSourceSpecificData22APF_harmonySessionType"
- "__Z21APF_ds2HarmonyHandlesPK15CAPF_DataSourcePP29harmonyDataSourceSpecificDataPP19HarmonyServerHandle"
- "__Z21ACCT_GetFirstIndexXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z21ACCT_GetJobLogPortionPK15CAPF_DataSourceRlS2_PPP17HarmonyAttributesS2_"
- "__Z21CBB_GetControlBarDataP15CAPF_DataSourceP15CAPF_PluginInfoPv"
- "__Z17ACCT_GetJobLogXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z10InitializeP15CAPF_PluginInfoP14PLUGINMEMBLOCK"
- "__Z14ACCT_GetJobLogPK15CAPF_DataSourcePPP17HarmonyAttributesRl"
- "__Z16out_HarmonyAttrsRNSt3__113basic_ostreamIcNS_11char_traitsIcEEEEiP17HarmonyAttributes"
- "__Z16ACCT_ClearJobLogPK15CAPF_DataSource"
- "__Z16CreateControlBarP8stCBInfoP20HarmonySessionHandle"
- "__Z19ACCT_GetEventsExXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z19ACCT_ClearJobLogXMLP15CAPF_PluginInfoP14PLUGINMEMBLOCKP9VERSIONEXPv"
- "__Z19ACCT_FreeStringListPP17HarmonyAttributesi"
- "__Z19CB_ImportControlBarP15CAPF_DataSourceP15CAPF_PluginInfoPv"
- "__Z19CBB_SetEFControlBarP15CAPF_DataSourceP15CAPF_PluginInfoPv"
- "__Z19CBB_GetResmanEventsP15CAPF_DataSourcePFvP20HarmonyResourceEventPvES3_P15HarmonyNBTicket"
- "__Z18ACCT_GetFirstIndexPK15CAPF_DataSourceRl"
- "__Z18AllocateStringCopyRPcRK8wxString"
8 changes: 4 additions & 4 deletions lib/src/modules/macho/tests/testdata/tiny_universal.out
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,9 @@ file:
version: "10.9.0"
sdk: "10.10.0"
exports:
- "__mh_execute_header"
- "_factorial"
- "_main"
- "_factorial"
- "__mh_execute_header"
- magic: 0xcffaedfe
cputype: 0x1000007
cpusubtype: 0x80000003
Expand Down Expand Up @@ -395,6 +395,6 @@ file:
version: "10.9.0"
sdk: "10.10.0"
exports:
- "__mh_execute_header"
- "_main"
- "_factorial"
- "_main"
- "__mh_execute_header"
Loading