Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement mach-o export trie parsing and export hashing function #132

Merged
merged 8 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions lib/src/modules/add_modules.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
// File generated automatically by build.rs. Do not edit.
{
#[cfg(feature = "string-module")]
add_module!(modules, "string", string, "string.String", Some("string"), Some(string::__main__ as MainFn));
#[cfg(feature = "test_proto2-module")]
add_module!(modules, "test_proto2", test_proto2, "test_proto2.TestProto2", Some("test_proto2"), Some(test_proto2::__main__ as MainFn));
#[cfg(feature = "macho-module")]
add_module!(modules, "macho", macho, "macho.Macho", Some("macho"), Some(macho::__main__ as MainFn));
#[cfg(feature = "pe-module")]
add_module!(modules, "pe", pe, "pe.PE", Some("pe"), Some(pe::__main__ as MainFn));
#[cfg(feature = "elf-module")]
add_module!(modules, "elf", elf, "elf.ELF", Some("elf"), Some(elf::__main__ as MainFn));
#[cfg(feature = "text-module")]
add_module!(modules, "text", text, "text.Text", Some("text"), Some(text::__main__ as MainFn));
#[cfg(feature = "dotnet-module")]
add_module!(modules, "dotnet", dotnet, "dotnet.Dotnet", Some("dotnet"), Some(dotnet::__main__ as MainFn));
#[cfg(feature = "lnk-module")]
add_module!(modules, "lnk", lnk, "lnk.Lnk", Some("lnk"), Some(lnk::__main__ as MainFn));
#[cfg(feature = "hash-module")]
add_module!(modules, "hash", hash, "hash.Hash", Some("hash"), Some(hash::__main__ as MainFn));
#[cfg(feature = "magic-module")]
add_module!(modules, "magic", magic, "magic.Magic", Some("magic"), Some(magic::__main__ as MainFn));
#[cfg(feature = "math-module")]
add_module!(modules, "math", math, "math.Math", Some("math"), Some(math::__main__ as MainFn));
#[cfg(feature = "test_proto2-module")]
add_module!(modules, "test_proto2", test_proto2, "test_proto2.TestProto2", Some("test_proto2"), Some(test_proto2::__main__ as MainFn));
#[cfg(feature = "text-module")]
add_module!(modules, "text", text, "text.Text", Some("text"), Some(text::__main__ as MainFn));
#[cfg(feature = "time-module")]
add_module!(modules, "time", time, "time.Time", Some("time"), Some(time::__main__ as MainFn));
#[cfg(feature = "dotnet-module")]
add_module!(modules, "dotnet", dotnet, "dotnet.Dotnet", Some("dotnet"), Some(dotnet::__main__ as MainFn));
#[cfg(feature = "test_proto3-module")]
add_module!(modules, "test_proto3", test_proto3, "test_proto3.TestProto3", Some("test_proto3"), Some(test_proto3::__main__ as MainFn));
#[cfg(feature = "pe-module")]
add_module!(modules, "pe", pe, "pe.PE", Some("pe"), Some(pe::__main__ as MainFn));
#[cfg(feature = "string-module")]
add_module!(modules, "string", string, "string.String", Some("string"), Some(string::__main__ as MainFn));
#[cfg(feature = "console-module")]
add_module!(modules, "console", console, "console.Console", Some("console"), Some(console::__main__ as MainFn));
#[cfg(feature = "elf-module")]
add_module!(modules, "elf", elf, "elf.ELF", Some("elf"), Some(elf::__main__ as MainFn));
#[cfg(feature = "math-module")]
add_module!(modules, "math", math, "math.Math", Some("math"), Some(math::__main__ as MainFn));
}
35 changes: 33 additions & 2 deletions lib/src/modules/macho/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,13 +311,13 @@ fn entitlement_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
let mut md5_hash = Md5::new();
let mut entitlements_to_hash = &macho.entitlements;

// if there are not any entitlements in the main Macho, the dylibs of the
// if there are not any entitlements in the main Macho, the entitlements of the
// nested file should be hashed
if entitlements_to_hash.is_empty() && !macho.file.is_empty() {
entitlements_to_hash = &macho.file[0].entitlements;
}

// we need to check again as the nested file dylibs could be empty too
// we need to check again as the nested file entitlements could be empty too
if entitlements_to_hash.is_empty() {
return None;
}
Expand All @@ -335,6 +335,37 @@ fn entitlement_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
Some(RuntimeString::new(digest))
}

/// Returns an md5 hash of the export symbols in the mach-o binary
#[module_export]
fn export_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
let macho = ctx.module_output::<Macho>()?;
let mut md5_hash = Md5::new();
let mut exports_to_hash = &macho.exports;

// if there are not any exports in the main Macho, the exports of the
// nested file should be hashed
if exports_to_hash.is_empty() && !macho.file.is_empty() {
exports_to_hash = &macho.file[0].exports;
}

// we need to check again as the nested file exports could be empty too
if exports_to_hash.is_empty() {
return None;
}

let exports_str: String = exports_to_hash
.iter()
.map(|e| e.trim().to_lowercase())
.unique()
.sorted()
.join(",");

md5_hash.update(exports_str.as_bytes());

let digest = format!("{:x}", md5_hash.finalize());
Some(RuntimeString::new(digest))
}

#[module_main]
fn main(input: &[u8]) -> Macho {
match parser::MachO::parse(input) {
Expand Down
166 changes: 163 additions & 3 deletions lib/src/modules/macho/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ use bstr::{BStr, ByteSlice};
use itertools::Itertools;
#[cfg(feature = "logging")]
use log::error;
use nom::bytes::complete::take;
use nom::bytes::complete::{tag, take, take_till};
use nom::combinator::{cond, map, verify};
use nom::error::ErrorKind;
use nom::multi::{count, length_count};
use nom::number::complete::{be_u32, le_u32, u16, u32, u64};
use nom::number::complete::{be_u32, le_u32, u16, u32, u64, u8};
use nom::number::Endianness;
use nom::sequence::tuple;
use nom::{Err, IResult, Parser};
Expand Down Expand Up @@ -36,6 +36,11 @@ const _CS_MAGIC_DETACHED_SIGNATURE: u32 = 0xfade0cc1;
const _CS_MAGIC_BLOBWRAPPER: u32 = 0xfade0b01;
const CS_MAGIC_EMBEDDED_ENTITLEMENTS: u32 = 0xfade7171;

/// Mach-O export flag constants
const EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION: u64 = 0x00000004;
const EXPORT_SYMBOL_FLAGS_REEXPORT: u64 = 0x00000008;
const EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER: u64 = 0x00000010;

/// Mach-O dynamic linker constant
const LC_REQ_DYLD: u32 = 0x80000000;

Expand Down Expand Up @@ -270,6 +275,7 @@ impl<'a> MachO<'a> {
uuid: None,
build_version: None,
min_version: None,
exports: Vec::new(),
};

for _ in 0..macho.header.ncmds as usize {
Expand Down Expand Up @@ -327,6 +333,22 @@ impl<'a> MachO<'a> {
// everything else
};
}

if let Some(ref dyld_info) = macho.dyld_info {
let offset = dyld_info.export_off as usize;
let size = dyld_info.export_size as usize;
if offset < data.len() {
let export_data = &data[offset..offset + size];
latonis marked this conversation as resolved.
Show resolved Hide resolved
if let Err(_err) = macho.exports()(export_data) {
#[cfg(feature = "logging")]
error!("Error parsing Mach-O file: {:?}", _err);
// fail silently if it fails, data was not formatted
// correctly but parsing should still proceed for
// everything else
};
}
}

Ok(macho)
}
}
Expand All @@ -352,6 +374,7 @@ pub struct MachOFile<'a> {
certificates: Option<Certificates>,
build_version: Option<BuildVersionCommand>,
min_version: Option<MinVersion>,
exports: Vec<String>,
}

impl<'a> MachOFile<'a> {
Expand Down Expand Up @@ -888,6 +911,87 @@ impl<'a> MachOFile<'a> {
)
}

fn parse_export_node(
&mut self,
) -> impl FnMut(&'a [u8], u64, &BStr) -> IResult<&'a [u8], String> + '_
{
move |data: &'a [u8], offset: u64, prefix: &BStr| {
let (remainder, length) = uleb128()(&data[offset as usize..])?;
let mut remaining_data = remainder;

if length != 0 {
let (remainder, flags) = uleb128()(remaining_data)?;
match flags {
EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER => {
let (remainder, _stub_offset) = uleb128()(remainder)?;

let (remainder, _resolver_offset) =
uleb128()(remainder)?;
remaining_data = remainder;
}
EXPORT_SYMBOL_FLAGS_REEXPORT => {
let (remainder, _ordinal) = uleb128()(remainder)?;

let (remainder, _label) = map(
tuple((take_till(|b| b == b'\x00'), tag(b"\x00"))),
|(s, _)| s,
)(
remainder
)?;

remaining_data = remainder;
}
EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION => {
let (remainder, _offset) = uleb128()(remainder)?;
remaining_data = remainder;
}
_ => {}
}
}

let (remainder, edges) = u8(remaining_data)?;
let mut edge_remainder = remainder;

for _ in 0..edges {
let (remainder, strr) = map(
tuple((take_till(|b| b == b'\x00'), tag(b"\x00"))),
|(s, _)| s,
)(edge_remainder)?;
let edge_label = BStr::new(strr);
let (remainder, edge_offset) = uleb128()(remainder)?;
let (_, _) = self.parse_export_node()(
data,
edge_offset,
BStr::new(&bstr::concat([prefix, edge_label])),
)?;
edge_remainder = remainder;
}

if length != 0 {
if let Ok(prefix) = prefix.to_str() {
self.exports.push(prefix.to_string())
}
}

Ok((data, prefix.to_str().unwrap().to_string()))
}
}

/// Parser that parses the exports at the offsets defined within LC_DYLD_INFO and LC_DYLD_INFO_ONLY
fn exports(
&mut self,
) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec<String>> + '_ {
move |data: &'a [u8]| {
let exports = Vec::<String>::new();
let Ok((remainder, _)) =
self.parse_export_node()(data, 0, BStr::new(""))
else {
todo!()
latonis marked this conversation as resolved.
Show resolved Hide resolved
};
Ok((remainder, exports))
}
}

/// Parser that parses a LC_ID_DYLINKER, LC_LOAD_DYLINKER or
/// LC_DYLD_ENVIRONMENT command.
fn dylinker_command(
Expand Down Expand Up @@ -1302,6 +1406,30 @@ fn uint(
}
}

/// Parser that reads ULEB128
fn uleb128() -> impl FnMut(&[u8]) -> IResult<&[u8], u64> {
move |input: &[u8]| {
let mut val: u64 = 0;
let mut shift: u64 = 0;

let mut data = input;
let mut byte: u8;

loop {
(data, byte) = u8(data)?;

val |= ((byte & !(1 << 7)) as u64) << shift;

if byte & (1 << 7) == 0 {
break;
}
shift += 7;
}

Ok((data, val))
}
}

/// Convert a decimal number representation to a version string representation.
fn convert_to_version_string(decimal_number: u32) -> String {
let major = decimal_number >> 16;
Expand Down Expand Up @@ -1404,7 +1532,7 @@ impl From<MachO<'_>> for protos::macho::Macho {
.rpaths
.extend(m.rpaths.iter().map(|rpath: &&[u8]| rpath.to_vec()));
result.entitlements.extend(m.entitlements.clone());

result.exports.extend(m.exports.clone());
result
.set_number_of_segments(m.segments.len().try_into().unwrap());
} else {
Expand Down Expand Up @@ -1483,6 +1611,7 @@ impl From<&MachOFile<'_>> for protos::macho::File {
result.dylibs.extend(macho.dylibs.iter().map(|dylib| dylib.into()));
result.rpaths.extend(macho.rpaths.iter().map(|rpath| rpath.to_vec()));
result.entitlements.extend(macho.entitlements.clone());
result.exports.extend(macho.exports.clone());

result
.set_number_of_segments(result.segments.len().try_into().unwrap());
Expand Down Expand Up @@ -1663,3 +1792,34 @@ impl From<&MinVersion> for protos::macho::MinVersion {
result
}
}

#[test]
fn test_uleb_parsing() {
let uleb_128_in = vec![0b1000_0001, 0b000_0001];
let Ok((_remainder, result)) = uleb128()(&uleb_128_in) else { todo!() };
assert_eq!(129, result);

let uleb_128_in = vec![0b1000_0000, 0b0000_0001];
let Ok((_remainder, result)) = uleb128()(&uleb_128_in) else { todo!() };
assert_eq!(128, result);

let uleb_128_in = vec![0b111_1111];
let Ok((_remainder, result)) = uleb128()(&uleb_128_in) else { todo!() };
assert_eq!(127, result);

let uleb_128_in = vec![0b111_1110];
let Ok((_remainder, result)) = uleb128()(&uleb_128_in) else { todo!() };
assert_eq!(126, result);

let uleb_128_in = vec![0b000_0000];
let Ok((_remainder, result)) = uleb128()(&uleb_128_in) else { todo!() };
assert_eq!(0, result);

let uleb_128_in = vec![0b1010_0000, 0b0000_0001];
let Ok((_remainder, result)) = uleb128()(&uleb_128_in) else { todo!() };
assert_eq!(160, result);

let uleb_128_in = vec![0b10010110, 0b00000101];
let Ok((_remainder, result)) = uleb128()(&uleb_128_in) else { todo!() };
assert_eq!(662, result);
}
35 changes: 33 additions & 2 deletions lib/src/modules/macho/tests/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use pretty_assertions::assert_eq;

use crate::modules::tests::create_binary_from_zipped_ihex;
use crate::tests::rule_false;
use crate::tests::rule_true;
Expand Down Expand Up @@ -366,4 +364,37 @@ fn test_macho_module() {
"#,
&[]
);

rule_true!(
r#"
import "macho"
rule macho_test {
condition:
macho.export_hash() == "7f3b75c82e3151fff6c0a55b51cd5b94"
}
"#,
&chess_macho_data
);

rule_true!(
r#"
import "macho"
rule macho_test {
condition:
not defined macho.export_hash()
}
"#,
&[]
);

rule_true!(
r#"
import "macho"
rule macho_test {
condition:
macho.export_hash() == "6bfc6e935c71039e6e6abf097830dceb"
}
"#,
&tiny_universal_macho_data
);
}
Loading
Loading