diff --git a/yara-x/src/modules/pe/parser.rs b/yara-x/src/modules/pe/parser.rs index 1da3075b5..f951c1af3 100644 --- a/yara-x/src/modules/pe/parser.rs +++ b/yara-x/src/modules/pe/parser.rs @@ -1,12 +1,14 @@ use std::cell::OnceCell; use std::cmp::min; use std::collections::VecDeque; +use std::default::Default; use std::iter::zip; use std::str::{from_utf8, FromStr}; use bstr::{BStr, ByteSlice}; use byteorder::{ByteOrder, LE}; use indexmap::IndexMap; +use itertools::Itertools; use memchr::memmem; use nom::branch::{alt, permutation}; use nom::bytes::complete::{take, take_till}; @@ -73,6 +75,9 @@ pub struct PE<'a> { /// computing the imphash. imported_dlls: OnceCell>>>>, + /// Export information about this PE file. + exports: OnceCell>>, + /// DOS header already parsed. pub dos_hdr: DOSHeader, @@ -326,15 +331,18 @@ impl<'a> PE<'a> { .unwrap_or_default() } - /// Returns the data associated to a given directory entry. + /// Returns the RVA, size and data associated to a given directory entry. /// - /// Each directory entry has a RVA and a size. This function translates the - /// RVA into a file offset and returns the chunk of file that starts at - /// that offset and has the size indicated by the directory entry. + /// This function translates the RVA into a file offset and returns the + /// chunk of file that starts at that offset and has the size indicated by + /// the directory entry. /// /// Returns `None` if the PE is corrupted in some way that prevents the /// data from being found. - pub fn get_dir_entry_data(&self, index: usize) -> Option<&'a [u8]> { + pub fn get_dir_entry_data( + &self, + index: usize, + ) -> Option<(u32, u32, &'a [u8])> { // Nobody should call this function with an index greater // than MAX_DIR_ENTRIES. debug_assert!(index < Self::MAX_DIR_ENTRIES); @@ -362,7 +370,9 @@ impl<'a> PE<'a> { start.saturating_add(dir_entry.size as usize), ); - self.data.get(start..end) + let data = self.data.get(start..end)?; + + Some((dir_entry.addr, dir_entry.size, data)) } pub fn get_imports( @@ -379,6 +389,10 @@ impl<'a> PE<'a> { .map(|(name, funcs)| (*name, funcs.as_slice())), ) } + + pub fn get_exports(&self) -> Option<&ExportInfo<'a>> { + self.exports.get_or_init(|| self.parse_exports()).as_ref() + } } impl<'a> PE<'a> { @@ -402,6 +416,7 @@ impl<'a> PE<'a> { const MAX_PE_SECTIONS: usize = 96; const MAX_PE_IMPORTS: usize = 16384; + const MAX_PE_EXPORTS: usize = 16384; const MAX_PE_RESOURCES: usize = 65535; const MAX_DIR_ENTRIES: usize = 16; @@ -1140,7 +1155,7 @@ impl<'a> PE<'a> { } fn parse_resources(&self) -> Option>> { - let rsrc_section = + let (_, _, rsrc_section) = self.get_dir_entry_data(Self::IMAGE_DIRECTORY_ENTRY_RESOURCE)?; // Resources are stored in tree structure with three levels. Non-leaf @@ -1271,7 +1286,7 @@ impl<'a> PE<'a> { } fn parse_dbg(&self) -> Option<&'a str> { - let dbg_section = + let (_, _, dbg_section) = self.get_dir_entry_data(Self::IMAGE_DIRECTORY_ENTRY_DEBUG)?; let entries = many0(Self::parse_dbg_dir_entry)(dbg_section) @@ -1410,7 +1425,7 @@ impl<'a> PE<'a> { fn parse_imports( &self, ) -> Option>>> { - let imports_section = + let (_, _, imports_section) = self.get_dir_entry_data(Self::IMAGE_DIRECTORY_ENTRY_IMPORT)?; // Parse import descriptors until finding one that is empty (filled @@ -1533,6 +1548,161 @@ impl<'a> PE<'a> { )(input) } + fn parse_exports(&self) -> Option> { + let (exports_rva, exports_size, exports_data) = + self.get_dir_entry_data(Self::IMAGE_DIRECTORY_ENTRY_EXPORT)?; + + // Parse the IMAGE_EXPORT_DIRECTORY structure. + let (_, exports) = Self::parse_exports_dir_entry(exports_data).ok()?; + + let num_exports = + min(exports.number_of_functions as usize, Self::MAX_PE_EXPORTS); + + // The IMAGE_EXPORT_DIRECTORY structure points to three arrays. The + // only required array is the Export Address Table (EAT), which is an + // array of function pointers that contain the address (RVA) of an + // exported function. The `address_of_functions` field contains the + // RVA for this array. There are as many exported functions as entries + // in the `address_of_functions` array. The size of this array is + // indicated by the `number_of_functions` field. + // + // The purpose of the other two arrays is associating a name to + // the imported functions, but not all functions have an associated + // name. Functions that are exported only by ordinal don't have an + // associated entry in these arrays. + // + // Let's illustrate it with an example: + // + // base: 5 + // address_of_functions: [ 0x00000011 | 0x00000022 | 0x00000033 ] + // address_of_name_ordinals: [ 0x0000 | 0x0002 | 0x0001 ] + // address_of_names: [ 0x00000044 | 0x00000055 ] + // + // The function at RVA 0x00000011 (index 0) has ordinal 5 (base+index). + // The index can be found at position 0 in the address_of_name_ordinals + // array. Using 0 to index into the address_of_names array gives us an + // RVA (0x00000044) where the function's name is located. + // + // The function at RVA 0x00000022 (index 1) has ordinal 6 (base+index). + // The index can be found at position 2 in the address_of_name_ordinals + // array. 2 is out of bounds for address_of_names, so this function is + // exported only by ordinal, not by name. + // + // The function at RVA 0x00000033 (index 2) has ordinal 7 (base+index). + // The index can be found in position 1 in the address_of_name_ordinals. + // array. Using 1 to index into the address_of_names array gives us an + // RVA (0x00000055) which we can follow to get the name. + // + // If the RVA from the address_of_functions is within the export + // directory it is a forwarder RVA and points to a NULL terminated + // ASCII string. + + let func_rvas = self.parse_at_rva( + exports.address_of_functions, + count(le_u32, num_exports), + )?; + + let names = self.parse_at_rva( + exports.address_of_names, + count(le_u32, exports.number_of_names as usize), + )?; + + let name_ordinals = self.parse_at_rva( + exports.address_of_name_ordinals, + count(le_u16, exports.number_of_names as usize), + )?; + + // Create a vector with one item per exported function. Items in the + // array initially have function RVA and ordinal only. + let mut exported_funcs: Vec<_> = func_rvas + .iter() + .enumerate() + .map(|(i, rva)| ExportedFunc { + rva: *rva, + ordinal: exports.base + i as u32, + ..Default::default() + }) + .collect(); + + // Set the name field for each exported function, if they are exported + // by name. + for f in exported_funcs.iter_mut() { + if let Some((idx, _)) = + name_ordinals.iter().find_position(|ordinal| { + **ordinal as u32 == f.ordinal - exports.base + }) + { + if let Some(name_rva) = names.get(idx) { + f.name = self.str_at_rva(*name_rva); + } + } + + // If the function's RVA is within the exports section (as given + // by the RVA and size fields in the directory entry), this is a + // forwarded function. In such cases the function's RVA is not + // really pointing to the function, but to a ASCII string that + // contains the DLL and function to which this export is forwarded. + if (exports_rva..exports_rva + exports_size).contains(&f.rva) { + f.forward_name = self.str_at_rva(f.rva); + } else { + f.offset = self.rva_to_offset(f.rva); + } + } + + Some(ExportInfo { + dll_name: self.dll_name_at_rva(exports.name), + timestamp: exports.timestamp, + functions: exported_funcs, + }) + } + + fn parse_exports_dir_entry( + input: &[u8], + ) -> IResult<&[u8], ExportsDirEntry> { + map( + tuple(( + le_u32, // characteristics + le_u32, // timestamp + le_u16, // major_version + le_u16, // minor_version + le_u32, // name + le_u32, // base + le_u32, // number_of_functions + le_u32, // number_of_names + le_u32, // address_of_functions + le_u32, // address_of_names + le_u32, // address_of_name_ordinals + )), + |( + characteristics, + timestamp, + major_version, + minor_version, + name, + base, + number_of_functions, + number_of_names, + address_of_functions, + address_of_names, + address_of_name_ordinals, + )| { + ExportsDirEntry { + characteristics, + timestamp, + major_version, + minor_version, + name, + base, + number_of_functions, + number_of_names, + address_of_functions, + address_of_names, + address_of_name_ordinals, + } + }, + )(input) + } + fn parse_at_rva(&self, rva: u32, mut parser: P) -> Option where P: FnMut(&'a [u8]) -> IResult<&'a [u8], T>, @@ -1541,9 +1711,13 @@ impl<'a> PE<'a> { parser(data).map(|(_, result)| result).ok() } - fn dll_name_at_rva(&self, rva: u32) -> Option<&'a str> { + fn str_at_rva(&self, rva: u32) -> Option<&'a str> { let dll_name = self.parse_at_rva(rva, take_till(|c| c == 0))?; - let dll_name = from_utf8(dll_name).ok()?; + from_utf8(dll_name).ok() + } + + fn dll_name_at_rva(&self, rva: u32) -> Option<&'a str> { + let dll_name = self.str_at_rva(rva)?; for c in dll_name.chars() { if matches!(c, ' ' | '"' | '*' | '<' | '>' | '?' | '|') { @@ -1644,7 +1818,28 @@ impl From> for pe::PE { result .resources .extend(pe.get_resources().iter().map(pe::Resource::from)); + + + let mut num_imported_funcs = 0; + if let Some(imports) = pe.get_imports() { + for (dll_name, functions) in imports { + let mut import = pe::Import::new(); + import.library_name = Some(dll_name.to_owned()); + import.functions = functions.iter().map(pe::Function::from).collect(); + num_imported_funcs += import.functions.len(); + result.import_details.push(import); + } + } + + result.set_number_of_imported_functions(num_imported_funcs as u64); + + if let Some(exports) = pe.get_exports() { + result.dll_name = exports.dll_name.map(|name| name.to_owned()); + result.export_timestamp = Some(exports.timestamp); + result.export_details.extend(exports.functions.iter().map(pe::Export::from)); + } + for (key, value) in pe.get_version_info() { let mut kv = pe::KeyValue::new(); kv.key = Some(key.to_owned()); @@ -1658,20 +1853,6 @@ impl From> for pe::PE { // result.version_info.insert(key.to_owned(), value.to_owned()); } - let mut num_imported_funcs = 0; - - if let Some(imports) = pe.get_imports() { - for (dll_name, functions) in imports { - let mut import = pe::Import::new(); - import.library_name = Some(dll_name.to_owned()); - import.functions = functions.iter().map(pe::Function::from).collect(); - num_imported_funcs += import.functions.len(); - result.import_details.push(import); - } - } - - result.set_number_of_imported_functions(num_imported_funcs as u64); - if let Some(rich_header) = pe.get_rich_header() { result.rich_signature = MessageField::some(pe::RichSignature { offset: Some(rich_header.offset.try_into().unwrap()), @@ -1905,6 +2086,47 @@ impl From<&ImportedFunc<'_>> for pe::Function { } } +pub struct ExportInfo<'a> { + dll_name: Option<&'a str>, + timestamp: u32, + functions: Vec>, +} + +#[derive(Default)] +pub struct ExportedFunc<'a> { + rva: u32, + offset: Option, + ordinal: u32, + name: Option<&'a str>, + forward_name: Option<&'a str>, +} + +impl From<&ExportedFunc<'_>> for pe::Export { + fn from(value: &ExportedFunc<'_>) -> Self { + let mut exp = pe::Export::new(); + exp.name = value.name.map(|name| name.to_owned()); + exp.ordinal = Some(value.ordinal); + exp.rva = Some(value.rva); + exp.offset = value.offset; + exp.forward_name = value.forward_name.map(|name| name.to_owned()); + exp + } +} + +pub struct ExportsDirEntry { + characteristics: u32, + timestamp: u32, + major_version: u16, + minor_version: u16, + name: u32, + base: u32, + number_of_functions: u32, + number_of_names: u32, + address_of_functions: u32, + address_of_names: u32, + address_of_name_ordinals: u32, +} + /// Represents a resource in the PE. pub struct Resource<'a> { rsrc_id: ResourceId<'a>, diff --git a/yara-x/src/modules/pe/tests/testdata/079a472d22290a94ebb212aa8015cdc8dd28a968c6b4d3b88acdd58ce2d3b885.out b/yara-x/src/modules/pe/tests/testdata/079a472d22290a94ebb212aa8015cdc8dd28a968c6b4d3b88acdd58ce2d3b885.out index 5da6c27ca..1afe3597e 100644 --- a/yara-x/src/modules/pe/tests/testdata/079a472d22290a94ebb212aa8015cdc8dd28a968c6b4d3b88acdd58ce2d3b885.out +++ b/yara-x/src/modules/pe/tests/testdata/079a472d22290a94ebb212aa8015cdc8dd28a968c6b4d3b88acdd58ce2d3b885.out @@ -26,6 +26,8 @@ base_of_code: 4096 base_of_data: 12288 entry_point: 2605 entry_point_raw: 5677 +dll_name: "CUSTPROF.dll" +export_timestamp: 1528213185 section_alignment: 4096 file_alignment: 512 loader_flags: 0 @@ -450,6 +452,36 @@ import_details { rva: 12372 } } +export_details { + name: "CP_PutItem" + ordinal: 200 + rva: 4204 + offset: 1132 +} +export_details { + name: "CP_GetItem" + ordinal: 201 + rva: 4414 + offset: 1342 +} +export_details { + name: "CP_DelItem" + ordinal: 202 + rva: 4096 + offset: 1024 +} +export_details { + name: "CP_GetTaxMap" + ordinal: 203 + rva: 4186 + offset: 1114 +} +export_details { + name: "_DllMain@12" + ordinal: 204 + rva: 4624 + offset: 1552 +} overlay { offset: 10752 size: 6048 diff --git a/yara-x/src/modules/pe/tests/testdata/23e72ce7e9cdbc80c0095484ebeb02f56b21e48fd67044e69e7a2ae76db631e5.out b/yara-x/src/modules/pe/tests/testdata/23e72ce7e9cdbc80c0095484ebeb02f56b21e48fd67044e69e7a2ae76db631e5.out index 32bcb06a3..ef22e75a2 100644 --- a/yara-x/src/modules/pe/tests/testdata/23e72ce7e9cdbc80c0095484ebeb02f56b21e48fd67044e69e7a2ae76db631e5.out +++ b/yara-x/src/modules/pe/tests/testdata/23e72ce7e9cdbc80c0095484ebeb02f56b21e48fd67044e69e7a2ae76db631e5.out @@ -25,6 +25,8 @@ checksum: 57102 base_of_code: 4096 entry_point: 1920 entry_point_raw: 4992 +dll_name: "mtxex.dll" +export_timestamp: 1827812126 section_alignment: 4096 file_alignment: 512 loader_flags: 0 @@ -376,6 +378,30 @@ import_details { rva: 8608 } } +export_details { + name: "DllGetClassObject" + ordinal: 1 + rva: 4144 + offset: 1072 +} +export_details { + name: "GetObjectContext" + ordinal: 2 + rva: 9853 + forward_name: "COMSVCS.GetObjectContext" +} +export_details { + name: "MTSCreateActivity" + ordinal: 3 + rva: 9896 + forward_name: "COMSVCS.MTSCreateActivity" +} +export_details { + name: "SafeRef" + ordinal: 4 + rva: 9930 + forward_name: "COMSVCS.SafeRef" +} overlay { offset: 0 size: 0 diff --git a/yara-x/src/modules/pe/tests/testdata/2d80c403b5c50f8bbacb65f58e7a19f272c62d1889216b7a6f1141571ec12649.out b/yara-x/src/modules/pe/tests/testdata/2d80c403b5c50f8bbacb65f58e7a19f272c62d1889216b7a6f1141571ec12649.out index 1dc839e7f..521d361d2 100644 --- a/yara-x/src/modules/pe/tests/testdata/2d80c403b5c50f8bbacb65f58e7a19f272c62d1889216b7a6f1141571ec12649.out +++ b/yara-x/src/modules/pe/tests/testdata/2d80c403b5c50f8bbacb65f58e7a19f272c62d1889216b7a6f1141571ec12649.out @@ -26,6 +26,8 @@ base_of_code: 40960 base_of_data: 49152 entry_point: 6256 entry_point_raw: 46704 +dll_name: "socks55.dll" +export_timestamp: 1274346651 section_alignment: 4096 file_alignment: 512 loader_flags: 0 @@ -199,6 +201,191 @@ import_details { rva: 49320 } } +export_details { + name: "AcceptThread@4" + ordinal: 1 + rva: 7308 +} +export_details { + name: "AuthLogin" + ordinal: 2 + rva: 24660 +} +export_details { + name: "AuthPass" + ordinal: 3 + rva: 24772 +} +export_details { + name: "CreateConnectStruct" + ordinal: 4 + rva: 4571 +} +export_details { + name: "Get_Reg_SZ" + ordinal: 5 + rva: 8023 +} +export_details { + name: "Socks5Accept" + ordinal: 6 + rva: 7102 +} +export_details { + name: "Socks5Auth" + ordinal: 7 + rva: 6747 +} +export_details { + name: "Socks5CmdIsSupported" + ordinal: 8 + rva: 5438 +} +export_details { + name: "Socks5GetCmd" + ordinal: 9 + rva: 6111 +} +export_details { + name: "Socks5SendCode" + ordinal: 10 + rva: 5135 +} +export_details { + name: "Socks5ServConnect" + ordinal: 11 + rva: 5201 +} +export_details { + name: "SocksPipe@4" + ordinal: 12 + rva: 4836 +} +export_details { + name: "Write_REG_SZ" + ordinal: 13 + rva: 10062 +} +export_details { + name: "_malloc" + ordinal: 14 + rva: 4541 +} +export_details { + name: "add_system_direcroty" + ordinal: 15 + rva: 8713 +} +export_details { + name: "autostart_bot" + ordinal: 16 + rva: 9087 +} +export_details { + name: "copy_autoinf" + ordinal: 17 + rva: 10246 +} +export_details { + name: "copy_filez" + ordinal: 18 + rva: 11372 +} +export_details { + name: "create_thread" + ordinal: 19 + rva: 4480 +} +export_details { + name: "filetyt" + ordinal: 20 + rva: 10011 +} +export_details { + name: "get_dword" + ordinal: 21 + rva: 11164 +} +export_details { + name: "hDllInstance" + ordinal: 22 + rva: 24724 +} +export_details { + name: "mutex_check" + ordinal: 23 + rva: 10774 +} +export_details { + name: "name_exe" + ordinal: 24 + rva: 24740 +} +export_details { + name: "rot13" + ordinal: 25 + rva: 7974 +} +export_details { + name: "rot13c" + ordinal: 26 + rva: 7684 +} +export_details { + name: "run_another@4" + ordinal: 27 + rva: 11944 +} +export_details { + name: "run_flash@4" + ordinal: 28 + rva: 11503 +} +export_details { + name: "run_process@4" + ordinal: 29 + rva: 10848 +} +export_details { + name: "run_reestr@4" + ordinal: 30 + rva: 10680 +} +export_details { + name: "socks5_exec" + ordinal: 31 + rva: 5502 +} +export_details { + name: "sread" + ordinal: 32 + rva: 4749 +} +export_details { + name: "sss_rans" + ordinal: 33 + rva: 9378 +} +export_details { + name: "swrite" + ordinal: 34 + rva: 4662 +} +export_details { + name: "xproxy_th@4" + ordinal: 35 + rva: 8224 +} +export_details { + name: "xsocks5" + ordinal: 36 + rva: 8824 +} +export_details { + name: "xstrchr" + ordinal: 37 + rva: 7640 +} overlay { offset: 0 size: 0 diff --git a/yara-x/src/modules/protos/pe.proto b/yara-x/src/modules/protos/pe.proto index a69401a32..1f8e2fd7e 100644 --- a/yara-x/src/modules/protos/pe.proto +++ b/yara-x/src/modules/protos/pe.proto @@ -32,44 +32,48 @@ message PE { // Entry point as it appears in the PE header (RVA). optional uint32 entry_point_raw = 16; - optional uint32 section_alignment = 17; - optional uint32 file_alignment = 18; - optional uint32 loader_flags = 19; + optional string dll_name = 17; + optional uint32 export_timestamp = 18; - optional uint32 size_of_optional_header = 20; - optional uint32 size_of_code = 21; - optional uint32 size_of_initialized_data = 22; - optional uint32 size_of_uninitialized_data = 23; - optional uint32 size_of_image = 24; - optional uint32 size_of_headers = 25; + optional uint32 section_alignment = 19; + optional uint32 file_alignment = 20; + optional uint32 loader_flags = 21; - optional uint64 size_of_stack_reserve = 26; - optional uint64 size_of_stack_commit = 27; - optional uint64 size_of_heap_reserve = 28; - optional uint64 size_of_heap_commit = 29; + optional uint32 size_of_optional_header = 22; + optional uint32 size_of_code = 23; + optional uint32 size_of_initialized_data = 24; + optional uint32 size_of_uninitialized_data = 25; + optional uint32 size_of_image = 26; + optional uint32 size_of_headers = 27; - optional uint32 pointer_to_symbol_table = 30; - optional uint32 number_of_symbols = 31; - optional uint32 number_of_rva_and_sizes = 32; - optional uint32 win32_version_value = 33; + optional uint64 size_of_stack_reserve = 28; + optional uint64 size_of_stack_commit = 29; + optional uint64 size_of_heap_reserve = 30; + optional uint64 size_of_heap_commit = 31; - map version_info = 34; - repeated KeyValue version_info_list = 35; + optional uint32 pointer_to_symbol_table = 32; + optional uint32 number_of_symbols = 33; + optional uint32 number_of_rva_and_sizes = 34; + optional uint32 win32_version_value = 35; - optional uint32 number_of_sections = 36; - optional uint64 number_of_imported_functions = 37; + map version_info = 36; + repeated KeyValue version_info_list = 37; - optional RichSignature rich_signature = 38; - optional string pdb_path = 39; + optional uint32 number_of_sections = 38; + optional uint64 number_of_imported_functions = 39; - repeated Section sections = 40; - repeated DirEntry data_directories = 41; - repeated Resource resources = 42; - repeated Import import_details = 43; + optional RichSignature rich_signature = 40; + optional string pdb_path = 41; + + repeated Section sections = 42; + repeated DirEntry data_directories = 43; + repeated Resource resources = 44; + repeated Import import_details = 45; + repeated Export export_details = 46; // TODO: implement resource_version? - optional Overlay overlay = 44; + optional Overlay overlay = 47; } message Version { @@ -104,11 +108,18 @@ message Import { repeated Function functions = 2; } +message Export { + optional string name= 1; + required uint32 ordinal = 2; + required uint32 rva = 3; + optional uint32 offset = 4; + optional string forward_name = 5; +} + message Function { optional string name = 1; optional uint32 ordinal = 2; required uint32 rva = 3; - } /// https://learn.microsoft.com/en-us/windows/win32/menurc/resource-types?redirectedfrom=MSDN