From 7fe3edec0e07f438d9b1025dd5faa24419fc528c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Mei=C3=9Fner?= Date: Tue, 22 Oct 2024 15:13:52 +0200 Subject: [PATCH] Refactor - make ELF parser modular (#622) * Factors out Elf64::parse_file_header(). * Factors out Elf64::parse_program_header_table(). * Factors out Elf64::parse_section_header_table(). * Makes program and section header scan more similar. * Moves slice_from_bytes() back in Elf64. * Removes self parameter from ELf64::get_string_in_section(). * Adds Elf64Phdr::file_range(). --- src/elf_parser/mod.rs | 265 ++++++++++++++++++++++++------------------ 1 file changed, 152 insertions(+), 113 deletions(-) diff --git a/src/elf_parser/mod.rs b/src/elf_parser/mod.rs index 12e13766..e6795c9a 100644 --- a/src/elf_parser/mod.rs +++ b/src/elf_parser/mod.rs @@ -63,6 +63,14 @@ pub enum ElfParserError { } impl Elf64Phdr { + /// Returns the byte range the section spans in the file. + pub fn file_range(&self) -> Option> { + (self.p_type == PT_LOAD).then(|| { + let offset = self.p_offset as usize; + offset..offset.saturating_add(self.p_filesz as usize) + }) + } + /// Returns the segment virtual address range. pub fn vm_range(&self) -> Range { let addr = self.p_vaddr; @@ -138,19 +146,7 @@ pub struct Elf64<'a> { impl<'a> Elf64<'a> { /// Parse from the given byte slice pub fn parse(elf_bytes: &'a [u8]) -> Result { - let file_header_range = 0..mem::size_of::(); - let file_header_bytes = elf_bytes - .get(file_header_range.clone()) - .ok_or(ElfParserError::OutOfBounds)?; - let ptr = file_header_bytes.as_ptr(); - if (ptr as usize) - .checked_rem(mem::align_of::()) - .map(|remaining| remaining != 0) - .unwrap_or(true) - { - return Err(ElfParserError::InvalidAlignment); - } - let file_header = unsafe { &*ptr.cast::() }; + let (file_header_range, file_header) = Self::parse_file_header(elf_bytes)?; if file_header.e_ident.ei_mag != ELFMAG || file_header.e_ident.ei_class != ELFCLASS64 @@ -165,49 +161,35 @@ impl<'a> Elf64<'a> { return Err(ElfParserError::InvalidFileHeader); } - let program_header_table_range = file_header.e_phoff as usize - ..mem::size_of::() - .err_checked_mul(file_header.e_phnum as usize)? - .err_checked_add(file_header.e_phoff as usize)?; - check_that_there_is_no_overlap(&file_header_range, &program_header_table_range)?; - let program_header_table = - slice_from_bytes::(elf_bytes, program_header_table_range.clone())?; + let (program_header_table_range, program_header_table) = + Self::parse_program_header_table(elf_bytes, file_header_range.clone(), file_header)?; + + let (section_header_table_range, section_header_table) = Self::parse_section_header_table( + elf_bytes, + file_header_range.clone(), + file_header, + program_header_table_range.clone(), + )?; - let section_header_table_range = file_header.e_shoff as usize - ..mem::size_of::() - .err_checked_mul(file_header.e_shnum as usize)? - .err_checked_add(file_header.e_shoff as usize)?; - check_that_there_is_no_overlap(&file_header_range, §ion_header_table_range)?; - check_that_there_is_no_overlap(&program_header_table_range, §ion_header_table_range)?; - let section_header_table = - slice_from_bytes::(elf_bytes, section_header_table_range.clone())?; section_header_table .first() .filter(|section_header| section_header.sh_type == SHT_NULL) .ok_or(ElfParserError::InvalidSectionHeader)?; - let mut prev_program_header: Option<&Elf64Phdr> = None; + let mut vaddr = 0usize; for program_header in program_header_table { if program_header.p_type != PT_LOAD { continue; } - - if let Some(prev_program_header) = prev_program_header { - // program headers must be ascending - if program_header.p_vaddr < prev_program_header.p_vaddr { - return Err(ElfParserError::InvalidProgramHeader); - } + if (program_header.p_vaddr as usize) < vaddr { + return Err(ElfParserError::InvalidProgramHeader); } - - if program_header + vaddr = program_header .p_offset - .err_checked_add(program_header.p_filesz)? as usize - > elf_bytes.len() - { + .err_checked_add(program_header.p_filesz)? as usize; + if vaddr > elf_bytes.len() { return Err(ElfParserError::OutOfBounds); } - - prev_program_header = Some(program_header) } let mut offset = 0usize; @@ -224,10 +206,10 @@ impl<'a> Elf64<'a> { if section_range.start < offset { return Err(ElfParserError::SectionNotInOrder); } - if section_range.end > elf_bytes.len() { + offset = section_range.end; + if offset > elf_bytes.len() { return Err(ElfParserError::OutOfBounds); } - offset = section_range.end; } let section_names_section_header = (file_header.e_shstrndx != SHN_UNDEF) @@ -283,6 +265,60 @@ impl<'a> Elf64<'a> { self.dynamic_relocations_table } + /// Parses the file header. + pub fn parse_file_header( + elf_bytes: &'a [u8], + ) -> Result<(std::ops::Range, &'a Elf64Ehdr), ElfParserError> { + let file_header_range = 0..mem::size_of::(); + let file_header_bytes = elf_bytes + .get(file_header_range.clone()) + .ok_or(ElfParserError::OutOfBounds)?; + let ptr = file_header_bytes.as_ptr(); + if (ptr as usize) + .checked_rem(mem::align_of::()) + .map(|remaining| remaining != 0) + .unwrap_or(true) + { + return Err(ElfParserError::InvalidAlignment); + } + let file_header = unsafe { &*ptr.cast::() }; + Ok((file_header_range, file_header)) + } + + /// Parses the program header table. + pub fn parse_program_header_table( + elf_bytes: &'a [u8], + file_header_range: std::ops::Range, + file_header: &Elf64Ehdr, + ) -> Result<(std::ops::Range, &'a [Elf64Phdr]), ElfParserError> { + let program_header_table_range = file_header.e_phoff as usize + ..mem::size_of::() + .err_checked_mul(file_header.e_phnum as usize)? + .err_checked_add(file_header.e_phoff as usize)?; + check_that_there_is_no_overlap(&file_header_range, &program_header_table_range)?; + let program_header_table = + Self::slice_from_bytes::(elf_bytes, program_header_table_range.clone())?; + Ok((program_header_table_range, program_header_table)) + } + + /// Parses the section header table. + pub fn parse_section_header_table( + elf_bytes: &'a [u8], + file_header_range: std::ops::Range, + file_header: &Elf64Ehdr, + program_header_table_range: std::ops::Range, + ) -> Result<(std::ops::Range, &'a [Elf64Shdr]), ElfParserError> { + let section_header_table_range = file_header.e_shoff as usize + ..mem::size_of::() + .err_checked_mul(file_header.e_shnum as usize)? + .err_checked_add(file_header.e_shoff as usize)?; + check_that_there_is_no_overlap(&file_header_range, §ion_header_table_range)?; + check_that_there_is_no_overlap(&program_header_table_range, §ion_header_table_range)?; + let section_header_table = + Self::slice_from_bytes::(elf_bytes, section_header_table_range.clone())?; + Ok((section_header_table_range, section_header_table)) + } + fn parse_sections(&mut self) -> Result<(), ElfParserError> { macro_rules! section_header_by_name { ($self:expr, $section_header:expr, $section_name:expr, @@ -302,7 +338,8 @@ impl<'a> Elf64<'a> { .section_names_section_header .ok_or(ElfParserError::NoSectionNameStringTable)?; for section_header in self.section_header_table.iter() { - let section_name = self.get_string_in_section( + let section_name = Self::get_string_in_section( + self.elf_bytes, section_names_section_header, section_header.sh_name, SECTION_NAME_LENGTH_MAXIMUM, @@ -327,7 +364,8 @@ impl<'a> Elf64<'a> { .iter() .find(|program_header| program_header.p_type == PT_DYNAMIC) { - dynamic_table = self.slice_from_program_header(dynamic_program_header).ok(); + dynamic_table = + Self::slice_from_program_header(self.elf_bytes, dynamic_program_header).ok(); } // if PT_DYNAMIC does not exist or is invalid (some of our tests have this), @@ -339,7 +377,7 @@ impl<'a> Elf64<'a> { .find(|section_header| section_header.sh_type == SHT_DYNAMIC) { dynamic_table = Some( - self.slice_from_section_header(dynamic_section_header) + Self::slice_from_section_header(self.elf_bytes, dynamic_section_header) .map_err(|_| ElfParserError::InvalidDynamicSectionTable)?, ); } @@ -402,7 +440,7 @@ impl<'a> Elf64<'a> { .sh_offset } as usize; - self.slice_from_bytes(offset..offset.err_checked_add(size)?) + Self::slice_from_bytes(self.elf_bytes, offset..offset.err_checked_add(size)?) .map(Some) .map_err(|_| ElfParserError::InvalidDynamicSectionTable) } @@ -425,7 +463,7 @@ impl<'a> Elf64<'a> { /// Query a single string from a section which is marked as SHT_STRTAB pub fn get_string_in_section( - &self, + elf_bytes: &'a [u8], section_header: &Elf64Shdr, offset_in_section: Elf64Word, maximum_length: usize, @@ -439,8 +477,7 @@ impl<'a> Elf64<'a> { ..(section_header.sh_offset as usize) .err_checked_add(section_header.sh_size as usize)? .min(offset_in_file.err_checked_add(maximum_length)?); - let unterminated_string_bytes = self - .elf_bytes + let unterminated_string_bytes = elf_bytes .get(string_range) .ok_or(ElfParserError::OutOfBounds)?; unterminated_string_bytes @@ -457,7 +494,8 @@ impl<'a> Elf64<'a> { /// Returns the string corresponding to the given `sh_name` pub fn section_name(&self, sh_name: Elf64Word) -> Result<&'a [u8], ElfParserError> { - self.get_string_in_section( + Self::get_string_in_section( + self.elf_bytes, self.section_names_section_header .ok_or(ElfParserError::NoSectionNameStringTable)?, sh_name, @@ -467,7 +505,8 @@ impl<'a> Elf64<'a> { /// Returns the name of the `st_name` symbol pub fn symbol_name(&self, st_name: Elf64Word) -> Result<&'a [u8], ElfParserError> { - self.get_string_in_section( + Self::get_string_in_section( + self.elf_bytes, self.symbol_names_section_header .ok_or(ElfParserError::NoStringTable)?, st_name, @@ -484,7 +523,8 @@ impl<'a> Elf64<'a> { /// Returns the name of the `st_name` dynamic symbol pub fn dynamic_symbol_name(&self, st_name: Elf64Word) -> Result<&'a [u8], ElfParserError> { - self.get_string_in_section( + Self::get_string_in_section( + self.elf_bytes, self.dynamic_symbol_names_section_header .ok_or(ElfParserError::NoDynamicStringTable)?, st_name, @@ -501,18 +541,19 @@ impl<'a> Elf64<'a> { return Err(ElfParserError::InvalidSectionHeader); } - self.slice_from_section_header(section_header) + Self::slice_from_section_header(self.elf_bytes, section_header) } /// Returns the `&[T]` contained in the data described by the given program /// header pub fn slice_from_program_header( - &self, + bytes: &'a [u8], &Elf64Phdr { p_offset, p_filesz, .. }: &Elf64Phdr, ) -> Result<&'a [T], ElfParserError> { - self.slice_from_bytes( + Self::slice_from_bytes( + bytes, (p_offset as usize)..(p_offset as usize).err_checked_add(p_filesz as usize)?, ) } @@ -520,19 +561,50 @@ impl<'a> Elf64<'a> { /// Returns the `&[T]` contained in the section data described by the given /// section header pub fn slice_from_section_header( - &self, + bytes: &'a [u8], &Elf64Shdr { sh_offset, sh_size, .. }: &Elf64Shdr, ) -> Result<&'a [T], ElfParserError> { - self.slice_from_bytes( + Self::slice_from_bytes( + bytes, (sh_offset as usize)..(sh_offset as usize).err_checked_add(sh_size as usize)?, ) } - /// Returns the `&[T]` contained at `elf_bytes[offset..size]` - fn slice_from_bytes(&self, range: Range) -> Result<&'a [T], ElfParserError> { - slice_from_bytes(self.elf_bytes, range) + /// Returns the `&[T]` contained at `bytes[range]` + fn slice_from_bytes( + bytes: &[u8], + range: Range, + ) -> Result<&[T], ElfParserError> { + if range + .len() + .checked_rem(mem::size_of::()) + .map(|remainder| remainder != 0) + .unwrap_or(true) + { + return Err(ElfParserError::InvalidSize); + } + + let bytes = bytes + .get(range.clone()) + .ok_or(ElfParserError::OutOfBounds)?; + + let ptr = bytes.as_ptr(); + if (ptr as usize) + .checked_rem(mem::align_of::()) + .map(|remaining| remaining != 0) + .unwrap_or(true) + { + return Err(ElfParserError::InvalidAlignment); + } + + Ok(unsafe { + slice::from_raw_parts( + ptr.cast(), + range.len().checked_div(mem::size_of::()).unwrap_or(0), + ) + }) } fn program_header_for_vaddr( @@ -559,16 +631,14 @@ impl fmt::Debug for Elf64<'_> { writeln!(f, "{program_header:#X?}")?; } for section_header in self.section_header_table.iter() { - let section_name = self - .get_string_in_section( - self.section_names_section_header.unwrap(), - section_header.sh_name, - SECTION_NAME_LENGTH_MAXIMUM, - ) - .and_then(|name| { - std::str::from_utf8(name).map_err(|_| ElfParserError::InvalidString) - }) - .unwrap(); + let section_name = Self::get_string_in_section( + self.elf_bytes, + self.section_names_section_header.unwrap(), + section_header.sh_name, + SECTION_NAME_LENGTH_MAXIMUM, + ) + .and_then(|name| std::str::from_utf8(name).map_err(|_| ElfParserError::InvalidString)) + .unwrap(); writeln!(f, "{section_name}")?; writeln!(f, "{section_header:#X?}")?; } @@ -577,16 +647,16 @@ impl fmt::Debug for Elf64<'_> { writeln!(f, "{symbol_table:#X?}")?; for symbol in symbol_table.iter() { if symbol.st_name != 0 { - let symbol_name = self - .get_string_in_section( - self.symbol_names_section_header.unwrap(), - symbol.st_name, - SYMBOL_NAME_LENGTH_MAXIMUM, - ) - .and_then(|name| { - std::str::from_utf8(name).map_err(|_| ElfParserError::InvalidString) - }) - .unwrap(); + let symbol_name = Self::get_string_in_section( + self.elf_bytes, + self.symbol_names_section_header.unwrap(), + symbol.st_name, + SYMBOL_NAME_LENGTH_MAXIMUM, + ) + .and_then(|name| { + std::str::from_utf8(name).map_err(|_| ElfParserError::InvalidString) + }) + .unwrap(); writeln!(f, "{symbol_name}")?; } } @@ -595,37 +665,6 @@ impl fmt::Debug for Elf64<'_> { } } -fn slice_from_bytes(bytes: &[u8], range: Range) -> Result<&[T], ElfParserError> { - if range - .len() - .checked_rem(mem::size_of::()) - .map(|remainder| remainder != 0) - .unwrap_or(true) - { - return Err(ElfParserError::InvalidSize); - } - - let bytes = bytes - .get(range.clone()) - .ok_or(ElfParserError::OutOfBounds)?; - - let ptr = bytes.as_ptr(); - if (ptr as usize) - .checked_rem(mem::align_of::()) - .map(|remaining| remaining != 0) - .unwrap_or(true) - { - return Err(ElfParserError::InvalidAlignment); - } - - Ok(unsafe { - slice::from_raw_parts( - ptr.cast(), - range.len().checked_div(mem::size_of::()).unwrap_or(0), - ) - }) -} - impl From for ElfParserError { fn from(_: ArithmeticOverflow) -> ElfParserError { ElfParserError::OutOfBounds