diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 38b5527d4..5035a0a8c 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -7,12 +7,8 @@ name: Python extension on: push: - branches: - - "*" - tags: - - "*" - pull_request: - workflow_dispatch: + paths: + - 'yara-x-py/**' permissions: contents: read diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index eb87a3f74..5bffa2e2e 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,7 +1,7 @@ name: Tests on: pull_request: - types: [ opened, reopened, synchronized, ready_for_review ] + types: [ opened, reopened, synchronize, ready_for_review ] push: jobs: test: diff --git a/rustfmt.toml b/rustfmt.toml index cbff8e2d9..c424d7dfa 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,4 +1,4 @@ max_width = 79 -use_small_heuristics = "max" +use_small_heuristics = "Max" comment_width = 79 wrap_comments = true diff --git a/yara-x/src/modules/macho/mod.rs b/yara-x/src/modules/macho/mod.rs index d8c9d7b2b..a55968621 100644 --- a/yara-x/src/modules/macho/mod.rs +++ b/yara-x/src/modules/macho/mod.rs @@ -7,7 +7,14 @@ use arrayref::array_ref; use byteorder::{BigEndian, ByteOrder}; -use nom::{bytes::complete::take, multi::count, number::complete::*, IResult}; +use nom::{ + bytes::complete::{tag, take, take_till}, + combinator::map_res, + multi::count, + number::complete::*, + sequence::tuple, + IResult, +}; use thiserror::Error; use crate::modules::prelude::*; @@ -50,7 +57,11 @@ const CPU_TYPE_POWERPC64: u32 = 0x01000012; /// Define Mach-O load commands const LC_SEGMENT: u32 = 0x00000001; const LC_UNIXTHREAD: u32 = 0x00000005; +const LC_LOAD_DYLIB: u32 = 0x0000000c; +const LC_ID_DYLIB: u32 = 0x0000000d; +const LC_LOAD_WEAK_DYLIB: u32 = 0x80000018; const LC_SEGMENT_64: u32 = 0x00000019; +const LC_REEXPORT_DYLIB: u32 = 0x8000001f; const LC_MAIN: u32 = 0x80000028; /// Enum that provides strongly-typed error system used in code @@ -153,6 +164,27 @@ struct LoadCommand { cmdsize: u32, } +/// `DylibObject`: Represents a dylib struct in the Mach-O file. +/// Fields: name, timestamp, current_version, compatibility_version +#[repr(C)] +#[derive(Debug, Default, Clone)] +struct DylibObject { + name: Vec, + timestamp: u32, + current_version: u32, + compatibility_version: u32, +} + +/// `DylibCommand`: Represents a dylib command in the Mach-O file. +/// Fields: cmd, cmdsize, dylib +#[repr(C)] +#[derive(Debug, Default, Clone)] +struct DylibCommand { + cmd: u32, + cmdsize: u32, + dylib: DylibObject, +} + /// `SegmentCommand32`: Represents a 32-bit segment command in the Mach-O file. /// Fields: cmd, cmdsize, segname, vmaddr, vmsize, fileoff, filesize, maxprot, /// initprot, nsects, flags @@ -609,6 +641,31 @@ fn swap_load_command(command: &mut LoadCommand) { command.cmdsize = BigEndian::read_u32(&command.cmdsize.to_le_bytes()); } +/// Swaps the endianness of fields within a Mach-O dylib from BigEndian +/// to LittleEndian in-place. +/// +/// # Arguments +/// +/// * `dylib`: A mutable reference to the Mach-O dylib. +fn swap_dylib(dylib: &mut DylibObject) { + dylib.timestamp = BigEndian::read_u32(&dylib.timestamp.to_le_bytes()); + dylib.compatibility_version = + BigEndian::read_u32(&dylib.compatibility_version.to_le_bytes()); + dylib.current_version = + BigEndian::read_u32(&dylib.current_version.to_le_bytes()); +} + +/// Swaps the endianness of fields within a Mach-O dylib command from +/// BigEndian to LittleEndian in-place. +/// +/// # Arguments +/// +/// * `command`: A mutable reference to the Mach-O dylib command. +fn swap_dylib_command(command: &mut DylibCommand) { + command.cmd = BigEndian::read_u32(&command.cmd.to_le_bytes()); + command.cmdsize = BigEndian::read_u32(&command.cmdsize.to_le_bytes()); +} + /// Swaps the endianness of fields within a 32-bit Mach-O segment command from /// BigEndian to LittleEndian in-place. /// @@ -839,6 +896,67 @@ fn parse_load_command(input: &[u8]) -> IResult<&[u8], LoadCommand> { Ok((input, LoadCommand { cmd, cmdsize })) } +/// Parse a Mach-O Dylib object, transforming raw bytes into a structured +/// format. +/// +/// # Arguments +/// +/// * `input`: A slice of bytes containing the raw dylib object data. +/// +/// # Returns +/// +/// A `nom` IResult containing the remaining unparsed input and the parsed +/// dylib structure, or a `nom` error if the parsing fails. +/// +/// # Errors +/// +/// Returns a `nom` error if the input data is insufficient or malformed. +fn parse_dylib(input: &[u8]) -> IResult<&[u8], DylibObject> { + // offset but we don't need it + let (input, _) = le_u32(input)?; + let (input, timestamp) = le_u32(input)?; + let (input, current_version) = le_u32(input)?; + let (input, compatibility_version) = le_u32(input)?; + + let (input, name) = map_res( + tuple((take_till(|b| b == b'\x00'), tag(b"\x00"))), + |(s, _)| std::str::from_utf8(s), + )(input)?; + + Ok(( + input, + DylibObject { + name: name.into(), + timestamp, + compatibility_version, + current_version, + }, + )) +} + +/// Parse a Mach-O DylibCommand, transforming raw bytes into a structured +/// format. +/// +/// # Arguments +/// +/// * `input`: A slice of bytes containing the raw DylibCommand data. +/// +/// # Returns +/// +/// A `nom` IResult containing the remaining unparsed input and the parsed +/// DylibCommand structure, or a `nom` error if the parsing fails. +/// +/// # Errors +/// +/// Returns a `nom` error if the input data is insufficient or malformed. +fn parse_dylib_command(input: &[u8]) -> IResult<&[u8], DylibCommand> { + let (input, cmd) = le_u32(input)?; + let (input, cmdsize) = le_u32(input)?; + let (input, dylib) = parse_dylib(input)?; + + Ok((input, DylibCommand { cmd, cmdsize, dylib })) +} + /// Parse the 32-bit segment command of a Mach-O file, offering a structured /// view of its content. /// @@ -1385,6 +1503,67 @@ fn parse_ppc_thread_state64(input: &[u8]) -> IResult<&[u8], PPCThreadState64> { Ok((input, PPCThreadState64 { srr0, srr1, r, cr, xer, lr, ctr, vrsave })) } +/// Handles the LC_LOAD_DYLIB, LC_ID_DYLIB, LC_LOAD_WEAK_DYLIB, and +/// LC_REEXPORT_DYLIB commands for Mach-O files, parsing the data +/// and populating a protobuf representation of the dylib. +/// +/// # Arguments +/// +/// * `command_data`: The raw byte data of the dylib command. +/// * `size`: The size of the dylib command data. +/// * `macho_file`: Mutable reference to the protobuf representation of the +/// Mach-O file. +/// +/// # Returns +/// +/// Returns a `Result<(), MachoError>` indicating the success or failure of the +/// operation. +/// +/// # Errors +/// +/// * `MachoError::FileSectionTooSmall`: Returned when the segment size is +/// smaller than the expected DylibCommand struct size. +/// * `MachoError::ParsingError`: Returned when there is an error parsing the +/// dylib command data. +/// * `MachoError::MissingHeaderValue`: Returned when the "magic" header value +/// is missing, needed for determining if bytes should be swapped. +fn handle_dylib_command( + command_data: &[u8], + size: usize, + macho_file: &mut File, +) -> Result<(), MachoError> { + if size < std::mem::size_of::() { + return Err(MachoError::FileSectionTooSmall( + "DylibCommand".to_string(), + )); + } + + let (_, mut dy) = parse_dylib_command(command_data) + .map_err(|e| MachoError::ParsingError(format!("{:?}", e)))?; + if should_swap_bytes( + macho_file + .magic + .ok_or(MachoError::MissingHeaderValue("magic".to_string()))?, + ) { + swap_dylib_command(&mut dy); + swap_dylib(&mut dy.dylib); + } + + let dylib = Dylib { + name: Some( + std::str::from_utf8(&dy.dylib.name) + .unwrap_or_default() + .to_string(), + ), + timestamp: Some(dy.dylib.timestamp), + compatibility_version: Some(dy.dylib.compatibility_version), + current_version: Some(dy.dylib.current_version), + ..Default::default() + }; + macho_file.dylibs.push(dylib); + Ok(()) +} + /// Handles the LC_SEGMENT command for 32-bit Mach-O files, parsing the data /// and populating a protobuf representation of the segment and its associated /// file sections. @@ -1912,6 +2091,10 @@ fn handle_command( LC_MAIN => { handle_main(command_data, cmdsize, macho_file)?; } + LC_LOAD_DYLIB | LC_ID_DYLIB | LC_LOAD_WEAK_DYLIB + | LC_REEXPORT_DYLIB => { + handle_dylib_command(command_data, cmdsize, macho_file)?; + } _ => {} } } @@ -2421,6 +2604,7 @@ fn main(ctx: &ScanContext) -> Macho { macho_proto.reserved = file_data.reserved; macho_proto.number_of_segments = file_data.number_of_segments; macho_proto.segments = file_data.segments; + macho_proto.dylibs = file_data.dylibs; macho_proto.entry_point = file_data.entry_point; macho_proto.stack_size = file_data.stack_size; } diff --git a/yara-x/src/modules/macho/tests/mod.rs b/yara-x/src/modules/macho/tests/mod.rs index 48d327be2..d1fa13dfd 100644 --- a/yara-x/src/modules/macho/tests/mod.rs +++ b/yara-x/src/modules/macho/tests/mod.rs @@ -224,6 +224,36 @@ fn test_swap_load_command() { assert_eq!(command.cmdsize, 0x88776655); } +#[test] +fn test_swap_dylib() { + let mut command = DylibObject { + timestamp: 0x11223344, + compatibility_version: 0x55667788, + current_version: 0x99AABBCC, + ..Default::default() + }; + + swap_dylib(&mut command); + + assert_eq!(command.timestamp, 0x44332211); + assert_eq!(command.compatibility_version, 0x88776655); + assert_eq!(command.current_version, 0xCCBBAA99); +} + +#[test] +fn test_swap_dylib_command() { + let mut command = DylibCommand { + cmd: 0x11223344, + cmdsize: 0x55667788, + ..Default::default() + }; + + swap_dylib_command(&mut command); + + assert_eq!(command.cmd, 0x44332211); + assert_eq!(command.cmdsize, 0x88776655); +} + #[test] fn test_swap_segment_command() { let mut segment = SegmentCommand32 { diff --git a/yara-x/src/modules/macho/tests/testdata/macho_ppc_file.out b/yara-x/src/modules/macho/tests/testdata/macho_ppc_file.out index 37265e749..1798e72ce 100644 --- a/yara-x/src/modules/macho/tests/testdata/macho_ppc_file.out +++ b/yara-x/src/modules/macho/tests/testdata/macho_ppc_file.out @@ -227,4 +227,10 @@ segments { nsects: 0 flags: 4 } +dylibs { + name: "/usr/lib/libSystem.B.dylib" + timestamp: 1111112572 + compatibility_version: 65536 + current_version: 4653313 +} entry_point: 3768 diff --git a/yara-x/src/modules/macho/tests/testdata/macho_x86_64_dylib_file.out b/yara-x/src/modules/macho/tests/testdata/macho_x86_64_dylib_file.out index 6ad8d7593..e7d9595c8 100644 --- a/yara-x/src/modules/macho/tests/testdata/macho_x86_64_dylib_file.out +++ b/yara-x/src/modules/macho/tests/testdata/macho_x86_64_dylib_file.out @@ -75,3 +75,15 @@ segments { nsects: 0 flags: 0 } +dylibs { + name: "fact_x86_64.dylib" + timestamp: 1 + compatibility_version: 0 + current_version: 0 +} +dylibs { + name: "/usr/lib/libSystem.B.dylib" + timestamp: 2 + compatibility_version: 65536 + current_version: 79495168 +} diff --git a/yara-x/src/modules/macho/tests/testdata/macho_x86_file.out b/yara-x/src/modules/macho/tests/testdata/macho_x86_file.out index bfa09b461..e8ebc004b 100644 --- a/yara-x/src/modules/macho/tests/testdata/macho_x86_file.out +++ b/yara-x/src/modules/macho/tests/testdata/macho_x86_file.out @@ -149,5 +149,11 @@ segments { nsects: 0 flags: 0 } +dylibs { + name: "/usr/lib/libSystem.B.dylib" + timestamp: 2 + compatibility_version: 65536 + current_version: 79495168 +} entry_point: 3728 stack_size: 0 diff --git a/yara-x/src/modules/macho/tests/testdata/tiny_universal.out b/yara-x/src/modules/macho/tests/testdata/tiny_universal.out index 95e97770f..34befd9fc 100644 --- a/yara-x/src/modules/macho/tests/testdata/tiny_universal.out +++ b/yara-x/src/modules/macho/tests/testdata/tiny_universal.out @@ -166,6 +166,12 @@ file { nsects: 0 flags: 0 } + dylibs { + name: "/usr/lib/libSystem.B.dylib" + timestamp: 2 + compatibility_version: 65536 + current_version: 79495168 + } entry_point: 3808 stack_size: 0 } @@ -343,6 +349,12 @@ file { nsects: 0 flags: 0 } + dylibs { + name: "/usr/lib/libSystem.B.dylib" + timestamp: 2 + compatibility_version: 65536 + current_version: 79495168 + } entry_point: 3808 stack_size: 0 } diff --git a/yara-x/src/modules/modules.rs b/yara-x/src/modules/modules.rs index 0e272571a..95feba1a6 100644 --- a/yara-x/src/modules/modules.rs +++ b/yara-x/src/modules/modules.rs @@ -1,17 +1,17 @@ // File generated automatically by build.rs. Do not edit. -#[cfg(feature = "string-module")] -pub mod string; -#[cfg(feature = "macho-module")] -pub mod macho; #[cfg(feature = "text-module")] pub mod text; -#[cfg(feature = "hash-module")] -pub mod hash; #[cfg(feature = "test_proto2-module")] pub mod test_proto2; #[cfg(feature = "lnk-module")] pub mod lnk; +#[cfg(feature = "hash-module")] +pub mod hash; +#[cfg(feature = "test_proto3-module")] +pub mod test_proto3; +#[cfg(feature = "macho-module")] +pub mod macho; #[cfg(feature = "time-module")] pub mod time; -#[cfg(feature = "test_proto3-module")] -pub mod test_proto3; \ No newline at end of file +#[cfg(feature = "string-module")] +pub mod string; \ No newline at end of file diff --git a/yara-x/src/modules/protos/macho.proto b/yara-x/src/modules/protos/macho.proto index bd8b3ca07..e75f3f17b 100644 --- a/yara-x/src/modules/protos/macho.proto +++ b/yara-x/src/modules/protos/macho.proto @@ -8,6 +8,13 @@ option (yara.module_options) = { rust_module: "macho" }; +message Dylib { + optional string name = 1; + optional uint32 timestamp = 2; + optional uint32 compatibility_version = 3; + optional uint32 current_version = 4; +} + message Section { optional string segname = 1; optional string sectname = 2; @@ -58,8 +65,9 @@ message File { optional uint32 reserved = 8; optional uint64 number_of_segments = 9; repeated Segment segments = 10; - optional uint64 entry_point = 11; - optional uint64 stack_size = 12; + repeated Dylib dylibs = 11; + optional uint64 entry_point = 12; + optional uint64 stack_size = 13; } message Macho { @@ -74,16 +82,17 @@ message Macho { optional uint32 reserved = 8; optional uint64 number_of_segments = 9; repeated Segment segments = 10; - optional uint64 entry_point = 11; - optional uint64 stack_size = 12; + repeated Dylib dylibs = 11; + optional uint64 entry_point = 12; + optional uint64 stack_size = 13; // Add fields for Mach-O fat binary header - optional uint32 fat_magic = 13; - optional uint32 nfat_arch = 14; - repeated FatArch fat_arch = 15; + optional uint32 fat_magic = 14; + optional uint32 nfat_arch = 15; + repeated FatArch fat_arch = 16; // Nested Mach-O files - repeated File file = 16; + repeated File file = 17; } enum HEADER {