From 0c02a3b79efef471cf9843632330ebed0507f4fe Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 24 Aug 2023 18:45:20 -0700 Subject: [PATCH] Add basic COFF (.obj) file support --- cle/__init__.py | 2 + cle/backends/__init__.py | 2 + cle/backends/coff.py | 568 +++++++++++++++++++++++++++++++++++++++ tests/test_coff.py | 36 +++ 4 files changed, 608 insertions(+) create mode 100644 cle/backends/coff.py create mode 100644 tests/test_coff.py diff --git a/cle/__init__.py b/cle/__init__.py index ab1b072b..c63f6a3c 100644 --- a/cle/__init__.py +++ b/cle/__init__.py @@ -19,6 +19,7 @@ BackedCGC, Backend, Blob, + Coff, ELFCore, ExceptionHandling, FunctionHint, @@ -81,6 +82,7 @@ "BackedCGC", "Backend", "Blob", + "Coff", "ELFCore", "ExceptionHandling", "FunctionHint", diff --git a/cle/backends/__init__.py b/cle/backends/__init__.py index fe9bfe3e..62d31b86 100644 --- a/cle/backends/__init__.py +++ b/cle/backends/__init__.py @@ -1,6 +1,7 @@ from .backend import ALL_BACKENDS, Backend, ExceptionHandling, FunctionHint, FunctionHintSource, register_backend from .blob import Blob from .cgc import CGC, BackedCGC +from .coff import Coff from .elf import ELF, ELFCore, MetaELF from .ihex import Hex from .java.apk import Apk @@ -32,6 +33,7 @@ "ELFCore", "MetaELF", "PE", + "Coff", "Blob", "CGC", "BackedCGC", diff --git a/cle/backends/coff.py b/cle/backends/coff.py new file mode 100644 index 00000000..dd991c14 --- /dev/null +++ b/cle/backends/coff.py @@ -0,0 +1,568 @@ +#!/usr/bin/env python3 +""" +Basic COFF object loader based on https://docs.microsoft.com/en-us/windows/win32/debug/pe-format +""" +import ctypes +import logging +import struct +from enum import IntEnum, IntFlag +from typing import Mapping, Optional, Sequence + +import archinfo + +from .backend import Backend, register_backend +from .region import Section, Segment +from .relocation import Relocation +from .symbol import Symbol, SymbolType + +log = logging.getLogger(__name__) + + +class StructurePrintMixin: + """ + A simple mixin to __repr__ ctypes structures + """ + + _enums = None + + def __repr__(self): + return self.dumps() + + def dumps(self, indent: int = 0) -> str: + """Pretty-print all fields and values of the structure, return a string""" + # FIXME: Doesn't work with inherited fields + s = "" + max_name_len = max(len(name) for name, _ in self._fields_) + for fname, ftype in self._fields_: + fval = getattr(self, fname) + s += " " * indent + ("%s: " % fname).ljust(max_name_len + 2) + if self._enums and fname in self._enums: + try: + s += repr(self._enums[fname](fval)) + "\n" + continue + except ValueError: + pass + uint_types = {ctypes.c_uint8, ctypes.c_int16, ctypes.c_uint16, ctypes.c_uint32} + if ftype in uint_types: + s += "0x%x" % fval + elif issubclass(ftype, ctypes.Array) and ftype._type_ in uint_types: + fmt, wrap = { + ctypes.c_uint8: ("%02x ", 16), + ctypes.c_uint16: ("%04x ", 8), + ctypes.c_uint32: ("%08x ", 4), + }[ftype._type_] + for i in range(ftype._length_): + if i % wrap == 0: + s += "\n" + " " * (indent + 2) + s += fmt % fval[i] + else: + s += "?" + s += "\n" + return s.rstrip() # Trim trailing newline + + +class IMAGE_FILE_MACHINE(IntEnum): + """ + Machine Types + """ + + I386 = 0x14C + AMD64 = 0x8664 + + +class CoffFileHeader(ctypes.LittleEndianStructure, StructurePrintMixin): + """ + COFF File Header + """ + + _pack_ = 1 + _fields_ = [ + ("Machine", ctypes.c_uint16), + ("NumberOfSections", ctypes.c_uint16), + ("TimeDateStamp", ctypes.c_uint32), + ("PointerToSymbolTable", ctypes.c_uint32), + ("NumberOfSymbols", ctypes.c_uint32), + ("SizeOfOptionalHeader", ctypes.c_uint16), + ("Characteristics", ctypes.c_uint16), + ] + _enums = {"Machine": IMAGE_FILE_MACHINE} + + +class IMAGE_SCN(IntFlag): + """ + Section Flags (Characteristics field) + """ + + MEM_EXECUTE = 0x20000000 + MEM_READ = 0x40000000 + MEM_WRITE = 0x80000000 + CNT_UNINITIALIZED_DATA = 0x00000080 + + +class CoffSectionTableEntry(ctypes.LittleEndianStructure, StructurePrintMixin): + """ + COFF Section Header + """ + + _pack_ = 1 + _fields_ = [ + ("Name", ctypes.c_uint8 * 8), + ("VirtualSize", ctypes.c_uint32), + ("VirtualAddress", ctypes.c_uint32), + ("SizeOfRawData", ctypes.c_uint32), + ("PointerToRawData", ctypes.c_uint32), + ("PointerToRelocations", ctypes.c_uint32), + ("PointerToLinenumbers", ctypes.c_uint32), + ("NumberOfRelocations", ctypes.c_uint16), + ("NumberOfLinenumbers", ctypes.c_uint16), + ("Characteristics", ctypes.c_uint32), + ] + + +class IMAGE_SYM_CLASS(IntEnum): + """ + Symbol Storage Class + """ + + EXTERNAL = 2 + STATIC = 3 + LABEL = 6 + FUNCTION = 101 + + +class CoffSymbolTableEntry(ctypes.LittleEndianStructure, StructurePrintMixin): + """ + COFF Symbol Table Entry + """ + + _pack_ = 1 + _fields_ = [ + ("Name", ctypes.c_uint8 * 8), + ("Value", ctypes.c_uint32), + ("SectionNumber", ctypes.c_int16), + ("Type", ctypes.c_uint16), + ("StorageClass", ctypes.c_uint8), + ("NumberOfAuxSymbols", ctypes.c_uint8), + ] + _enums = {"StorageClass": IMAGE_SYM_CLASS} + + +class IMAGE_REL_I386(IntEnum): + """ + i386 Relocation Types + """ + + DIR32 = 0x0006 + DIR32NB = 0x0007 + REL32 = 0x0014 + SECTION = 0x000A + SECREL = 0x000B + + +class IMAGE_REL_AMD64(IntEnum): + """ + AMD64 Relocation Types + """ + + ADDR64 = 0x0001 + ADDR32NB = 0x0003 + REL32 = 0x0004 + SECTION = 0x000A + SECREL = 0x000B + + +class CoffRelocationTableEntry(ctypes.LittleEndianStructure, StructurePrintMixin): + """ + COFF Relocations + """ + + _pack_ = 1 + _fields_ = [ + ("VirtualAddress", ctypes.c_uint32), + ("SymbolTableIndex", ctypes.c_uint32), + ("Type", ctypes.c_uint16), + ] + + +class CoffParser: + """ + Parses COFF object files. + """ + + data: bytes + header: CoffFileHeader + sections: Sequence[CoffSectionTableEntry] + relocations: Sequence[Sequence[CoffRelocationTableEntry]] + symbols: Sequence[CoffSymbolTableEntry] + + # Note: Symbols are uniquely identified by their index. It is possible for multiple symbols to have the same name so + # in idx_to_symbol_name and symbol_name_to_idx, numeric suffixes are appended when necessary. To get the true name + # of a symbol at index `symbol_idx`, call get_symbol_name(symbol_idx, true_name=True). + idx_to_symbol_name: Mapping[int, str] + symbol_name_to_idx: Mapping[str, int] + + def __init__(self, data: bytes): + if data.startswith(b"\x00\x00\xff\xff"): + raise ValueError( + "This object file appears to have been compiled with whole program optimization (/GL flag)" + " and cannot be parsed by this library" + ) + self.data: bytes = data + self._parse() + + def _parse(self) -> None: + self.header = CoffFileHeader.from_buffer_copy(self.data) + log.debug("Parsed header:\n%s", self.header.dumps()) + + if self.header.Machine not in { + IMAGE_FILE_MACHINE.I386, + IMAGE_FILE_MACHINE.AMD64, + }: + raise NotImplementedError("Unsupported machine type") + + strings_offset = ( + self.header.PointerToSymbolTable + ctypes.sizeof(CoffSymbolTableEntry) * self.header.NumberOfSymbols + ) + strings_size = struct.unpack(" str: + name = bytearray() + while True: + x = data[offset] + if x == 0: + break + name.append(x) + offset += 1 + return str(name, encoding=(encoding or "ascii")) + + def get_symbol_name(self, symbol_idx: int, true_name: bool = False) -> str: + if symbol_idx in self.idx_to_symbol_name and not true_name: + return self.idx_to_symbol_name[symbol_idx] + + name_encoded = bytes(self.symbols[symbol_idx].Name) + if name_encoded[0:4] == b"\x00\x00\x00\x00": + offset = struct.unpack(" str: + name = bytes(self.sections[section_idx].Name).rstrip(b"\x00").decode("ascii") + if name.startswith("/"): + return self.get_symbol_name(int(name[1:])) + return name + + +class CoffSection(Section): + """ + Section of the COFF object. + """ + + def __init__( + self, + name: str, + file_offset: int, + file_size: int, + virtual_addr: int, + virtual_size: int, + coff_sec: CoffSectionTableEntry, + ): + super().__init__(name, file_offset, virtual_addr, virtual_size) + self.filesize = file_size + self._coff_sec = coff_sec + + @property + def is_readable(self): + return (self._coff_sec.Characteristics & IMAGE_SCN.MEM_READ) != 0 + + @property + def is_writable(self): + return (self._coff_sec.Characteristics & IMAGE_SCN.MEM_WRITE) != 0 + + @property + def is_executable(self): + return (self._coff_sec.Characteristics & IMAGE_SCN.MEM_EXECUTE) != 0 + + @property + def only_contains_uninitialized_data(self): + return (self._coff_sec.Characteristics & IMAGE_SCN.CNT_UNINITIALIZED_DATA) != 0 + + +class CoffRelocation(Relocation): + """ + Relocation for a COFF object. + """ + + def relocate(self): + value = self.value + if value is None: + log.debug("Unresolved relocation with no symbol.") + return + self.owner.memory.store(self.relative_addr, value) + + +class CoffRelocationREL32(CoffRelocation): + """ + Relocation for IMAGE_REL_*_REL32 + """ + + @property + def value(self): + org_bytes = self.owner.memory.load(self.relative_addr, 4) + org_value = struct.unpack(" None: + for sym_name, sym_idx in self._coff.symbol_name_to_idx.items(): + sym = self._coff.symbols[sym_idx] + if sym.SectionNumber > 0 and sym.StorageClass in { + IMAGE_SYM_CLASS.STATIC, + IMAGE_SYM_CLASS.LABEL, + IMAGE_SYM_CLASS.EXTERNAL, + }: + self.symbols.add(self.get_symbol(sym_name)) + + def _add_relocs(self) -> None: + for section_idx, section in enumerate(self._coff.sections): + for reloc in self._coff.relocations[section_idx]: + sym = self._coff.symbols[reloc.SymbolTableIndex] + sym_name = self._coff.get_symbol_name(reloc.SymbolTableIndex) + patch_offset = section.PointerToRawData + reloc.VirtualAddress + + if sym.StorageClass in { + IMAGE_SYM_CLASS.STATIC, + IMAGE_SYM_CLASS.LABEL, + IMAGE_SYM_CLASS.EXTERNAL, + }: + reloc_class = RELOC_CLASSES[self._coff.header.Machine].get(reloc.Type, None) + if reloc_class is not None: + cle_symbol = self.get_symbol(sym_name, produce_extern_symbols=True) + self.relocs.append(reloc_class(self, cle_symbol, patch_offset)) + continue + + log.warning("Skipped relocation type %#x at %#x for symbol %s", reloc.Type, patch_offset, sym_name) + + @staticmethod + def is_compatible(stream): + stream.seek(0) + identstring = stream.read(2) + stream.seek(0) + return int.from_bytes(identstring, "little") in (IMAGE_FILE_MACHINE.I386, IMAGE_FILE_MACHINE.AMD64) + + def get_symbol(self, name: str, produce_extern_symbols: bool = False) -> Optional[Symbol]: + if name not in self._coff.symbol_name_to_idx: + return None + + if name == "__ImageBase": + return Symbol(self, name, 0, 0, SymbolType.TYPE_OTHER) + + sym = self._coff.symbols[self._coff.symbol_name_to_idx[name]] + if sym.StorageClass in { + IMAGE_SYM_CLASS.STATIC, + IMAGE_SYM_CLASS.LABEL, + IMAGE_SYM_CLASS.EXTERNAL, + }: + symbol_type = SymbolType.TYPE_FUNCTION if sym.Type == 0x20 else SymbolType.TYPE_OTHER + if sym.SectionNumber > 0: + sym_addr = self._coff.sections[sym.SectionNumber - 1].PointerToRawData + sym.Value + return Symbol(self, name, sym_addr, 1, symbol_type) + elif sym.SectionNumber == 0: + if produce_extern_symbols: + return Symbol(self, name, 0, sym.Value, symbol_type) + return None + + raise NotImplementedError("Unsupported symbol") + + +register_backend("COFF", Coff) diff --git a/tests/test_coff.py b/tests/test_coff.py new file mode 100644 index 00000000..4aefb6e0 --- /dev/null +++ b/tests/test_coff.py @@ -0,0 +1,36 @@ +# pylint:disable=no-self-use + +import os +import unittest + +import cle + +TEST_BASE = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.path.join("..", "..", "binaries")) + + +class TestCoff(unittest.TestCase): + """ + Test COFF loader. + """ + + def test_x86(self): + exe = os.path.join(TEST_BASE, "tests", "x86", "fauxware.obj") + ld = cle.Loader(exe) + symbol_names = {sym.name for sym in ld.main_object.symbols} + assert "_main" in symbol_names + assert "_accepted" in symbol_names + assert "_rejected" in symbol_names + assert "_authenticate" in symbol_names + + def test_x86_64(self): + exe = os.path.join(TEST_BASE, "tests", "x86_64", "fauxware.obj") + ld = cle.Loader(exe) + symbol_names = {sym.name for sym in ld.main_object.symbols} + assert "main" in symbol_names + assert "accepted" in symbol_names + assert "rejected" in symbol_names + assert "authenticate" in symbol_names + + +if __name__ == "__main__": + unittest.main()