From e0594079c6827b63cea39303555f0739bdb277ef Mon Sep 17 00:00:00 2001 From: gauthier Date: Wed, 4 Oct 2023 17:28:34 +0200 Subject: [PATCH] arm: add support for arm architecture and improve elf files --- .github/pyinstaller/hooks/hook-vivisect.py | 5 + CHANGELOG.md | 4 +- capa/features/common.py | 3 +- capa/features/extractors/elf.py | 2 +- capa/features/extractors/elffile.py | 2 + capa/features/extractors/viv/basicblock.py | 20 +- capa/features/extractors/viv/extractor.py | 9 +- capa/features/extractors/viv/function.py | 2 +- capa/features/extractors/viv/global_.py | 5 +- capa/features/extractors/viv/helpers.py | 17 + .../features/extractors/viv/indirect_calls.py | 58 +- capa/features/extractors/viv/insn.py | 27 +- capa/features/extractors/viv/insn_arm.py | 481 ++++++ capa/features/extractors/viv/syscall.py | 1359 +++++++++++++++++ capa/main.py | 28 +- 15 files changed, 1981 insertions(+), 41 deletions(-) create mode 100644 capa/features/extractors/viv/insn_arm.py create mode 100644 capa/features/extractors/viv/syscall.py diff --git a/.github/pyinstaller/hooks/hook-vivisect.py b/.github/pyinstaller/hooks/hook-vivisect.py index 8038b7146..d551168c5 100644 --- a/.github/pyinstaller/hooks/hook-vivisect.py +++ b/.github/pyinstaller/hooks/hook-vivisect.py @@ -40,6 +40,10 @@ "vivisect.analysis.amd64", "vivisect.analysis.amd64.emulation", "vivisect.analysis.amd64.golang", + "vivisect.analysis.arm", + "vivisect.analysis.arm.emulation", + "vivisect.analysis.arm.renaming", + "vivisect.analysis.arm.thunk_reg", "vivisect.analysis.crypto", "vivisect.analysis.crypto.constants", "vivisect.analysis.elf", @@ -76,6 +80,7 @@ "vivisect.analysis.ms.vftables", "vivisect.analysis.pe", "vivisect.impapi.posix.amd64", + "vivisect.impapi.posix.arm", "vivisect.impapi.posix.i386", "vivisect.impapi.windows", "vivisect.impapi.windows.advapi_32", diff --git a/CHANGELOG.md b/CHANGELOG.md index 2214d3233..1d12b871b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,10 @@ ### New Features - ghidra: add Ghidra feature extractor and supporting code #1770 @colton-gabertan - ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff -- binja: add support for forwarded exports #1646 @xusheng6 - binja: add support for symtab names #1504 @xusheng6 +- ARM: add support for ARM architecture +- ELF: improve ELF stripper +- ELF: improve statically linked ELF files analysis ### Breaking Changes diff --git a/capa/features/common.py b/capa/features/common.py index 9278f7e8f..6db96a88a 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -407,9 +407,10 @@ def get_value_str(self): # other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types ARCH_I386 = "i386" ARCH_AMD64 = "amd64" +ARCH_ARM = "ARM" # dotnet ARCH_ANY = "any" -VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY) +VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ARM, ARCH_ANY) class Arch(Feature): diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index bad8543e8..6f6d10e23 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -978,7 +978,7 @@ def detect_elf_os(f) -> str: elif symtab_guess: ret = symtab_guess - return ret.value if ret is not None else "unknown" + return ret.value if ret is not None else "linux" def detect_elf_arch(f: BinaryIO) -> str: diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index 8ed74e877..04799379d 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -122,6 +122,8 @@ def extract_file_arch(elf: ELFFile, **kwargs): yield Arch("i386"), NO_ADDRESS elif arch == "x64": yield Arch("amd64"), NO_ADDRESS + elif arch == "ARM": + yield Arch("ARM"), NO_ADDRESS else: logger.warning("unsupported architecture: %s", arch) diff --git a/capa/features/extractors/viv/basicblock.py b/capa/features/extractors/viv/basicblock.py index 46bdb2b09..e0a36e05a 100644 --- a/capa/features/extractors/viv/basicblock.py +++ b/capa/features/extractors/viv/basicblock.py @@ -8,9 +8,10 @@ import string import struct -from typing import Tuple, Iterator +from typing import Tuple, Union, Iterator import envi +import envi.archs.arm.disasm import envi.archs.i386.disasm from capa.features.common import Feature, Characteristic @@ -76,7 +77,7 @@ def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Featu yield Characteristic("stack string"), bb.address -def is_mov_imm_to_stack(instr: envi.archs.i386.disasm.i386Opcode) -> bool: +def is_mov_imm_to_stack(instr: Union[envi.archs.i386.disasm.i386Opcode, envi.archs.arm.disasm.ArmOpcode]) -> bool: """ Return if instruction moves immediate onto stack """ @@ -92,22 +93,27 @@ def is_mov_imm_to_stack(instr: envi.archs.i386.disasm.i386Opcode) -> bool: if not src.isImmed(): return False - if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and not isinstance( - dst, envi.archs.i386.disasm.i386RegMemOper + if ( + not isinstance(dst, envi.archs.i386.disasm.i386SibOper) + and not isinstance(dst, envi.archs.i386.disasm.i386RegMemOper) + and not isinstance(dst, envi.archs.arm.disasm.ArmRegOper) ): return False if not dst.reg: return False - rname = dst._dis_regctx.getRegisterName(dst.reg) - if rname not in ["ebp", "rbp", "esp", "rsp"]: + if isinstance(dst, (envi.archs.i386.disasm.i386SibOper, envi.archs.i386.disasm.i386RegMemOper)): + rname = dst._dis_regctx.getRegisterName(dst.reg) + else: + rname = dst.reg + if rname not in ["ebp", "rbp", "esp", "rsp", envi.archs.arm.disasm.REG_SP, envi.archs.arm.disasm.REG_BP]: return False return True -def get_printable_len(oper: envi.archs.i386.disasm.i386ImmOper) -> int: +def get_printable_len(oper: Union[envi.archs.i386.disasm.i386ImmOper, envi.archs.arm.disasm.ArmImmOper]) -> int: """ Return string length if all operand bytes are ascii or utf16-le printable """ diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index faddb05d1..5c38b6126 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -17,6 +17,7 @@ import capa.features.extractors.viv.insn import capa.features.extractors.viv.global_ import capa.features.extractors.viv.function +import capa.features.extractors.viv.insn_arm import capa.features.extractors.viv.basicblock from capa.features.common import Feature from capa.features.address import Address, AbsoluteVirtualAddress @@ -26,10 +27,11 @@ class VivisectFeatureExtractor(FeatureExtractor): - def __init__(self, vw, path: Path, os): + def __init__(self, vw, path: Path, os, arm=False): super().__init__() self.vw = vw self.path = path + self.arm = arm self.buf = path.read_bytes() # pre-compute these because we'll yield them at *every* scope. @@ -74,7 +76,10 @@ def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHa def extract_insn_features( self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle ) -> Iterator[Tuple[Feature, Address]]: - yield from capa.features.extractors.viv.insn.extract_features(fh, bbh, ih) + if self.arm: + yield from capa.features.extractors.viv.insn_arm.extract_features(fh, bbh, ih) + else: + yield from capa.features.extractors.viv.insn.extract_features(fh, bbh, ih) def is_library_function(self, addr): return viv_utils.flirt.is_library_function(self.vw, addr) diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index b018b34bb..a1b3c5f05 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -80,7 +80,7 @@ def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Ad bflags & envi.BR_COND or bflags & envi.BR_FALL or bflags & envi.BR_TABLE - or bb.instructions[-1].mnem == "jmp" + or bb.instructions[-1].mnem in ["jmp", "b", "bx"] ): edges.append((bb.va, bva)) diff --git a/capa/features/extractors/viv/global_.py b/capa/features/extractors/viv/global_.py index 1b2042373..0cbe91f69 100644 --- a/capa/features/extractors/viv/global_.py +++ b/capa/features/extractors/viv/global_.py @@ -8,7 +8,7 @@ import logging from typing import Tuple, Iterator -from capa.features.common import ARCH_I386, ARCH_AMD64, Arch, Feature +from capa.features.common import ARCH_ARM, ARCH_I386, ARCH_AMD64, Arch, Feature from capa.features.address import NO_ADDRESS, Address logger = logging.getLogger(__name__) @@ -22,6 +22,9 @@ def extract_arch(vw) -> Iterator[Tuple[Feature, Address]]: elif arch == "i386": yield Arch(ARCH_I386), NO_ADDRESS + elif arch == "ARM": + yield Arch(ARCH_ARM), NO_ADDRESS + else: # we likely end up here: # 1. handling a new architecture (e.g. aarch64) diff --git a/capa/features/extractors/viv/helpers.py b/capa/features/extractors/viv/helpers.py index 662704afa..1786a532f 100644 --- a/capa/features/extractors/viv/helpers.py +++ b/capa/features/extractors/viv/helpers.py @@ -7,6 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. from typing import Optional +import envi from vivisect import VivWorkspace from vivisect.const import XR_TO, REF_CODE @@ -21,3 +22,19 @@ def get_coderef_from(vw: VivWorkspace, va: int) -> Optional[int]: return xrefs[0][XR_TO] else: return None + + +def read_memory(vw, va: int, size: int) -> bytes: + # as documented in #176, vivisect will not readMemory() when the section is not marked readable. + # + # but here, we don't care about permissions. + # so, copy the viv implementation of readMemory and remove the permissions check. + # + # this is derived from: + # https://github.com/vivisect/vivisect/blob/5eb4d237bddd4069449a6bc094d332ceed6f9a96/envi/memory.py#L453-L462 + for mva, mmaxva, mmap, mbytes in vw._map_defs: + if va >= mva and va < mmaxva: + mva, msize, mperms, mfname = mmap + offset = va - mva + return mbytes[offset : offset + size] + raise envi.exc.SegmentationViolation(va) diff --git a/capa/features/extractors/viv/indirect_calls.py b/capa/features/extractors/viv/indirect_calls.py index 71c63172f..4c7fe41ef 100644 --- a/capa/features/extractors/viv/indirect_calls.py +++ b/capa/features/extractors/viv/indirect_calls.py @@ -11,6 +11,7 @@ import envi import vivisect.const +import envi.archs.arm.disasm import envi.archs.i386.disasm import envi.archs.amd64.disasm from vivisect import VivWorkspace @@ -20,12 +21,15 @@ i386ImmOper = envi.archs.i386.disasm.i386ImmOper i386ImmMemOper = envi.archs.i386.disasm.i386ImmMemOper Amd64RipRelOper = envi.archs.amd64.disasm.Amd64RipRelOper +ARMRegOper = envi.archs.arm.disasm.ArmRegOper +ARMImmOper = envi.archs.arm.disasm.ArmImmOper +ARMScaledOffsetOper = envi.archs.arm.disasm.ArmScaledOffsetOper LOC_OP = vivisect.const.LOC_OP IF_NOFALL = envi.IF_NOFALL REF_CODE = vivisect.const.REF_CODE FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH -DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor") +DESTRUCTIVE_MNEMONICS = ("mov", "lea", "ldr", "pop", "xor", "eor") def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]: @@ -71,6 +75,38 @@ class NotFoundError(Exception): pass +def find_value(vw: VivWorkspace, va: int, reg: int, q): + tmp = 0 + seen = set([]) # type: Set[int] + + q.extend(get_previous_instructions(vw, va)) + while q: + cur = q.popleft() + if cur in seen: + continue + seen.add(cur) + insn = vw.parseOpcode(cur) + if len(insn.opers) == 0: + q.extend(get_previous_instructions(vw, cur)) + continue + + opnd0 = insn.opers[0] + if not (isinstance(opnd0, ARMRegOper) and opnd0.reg == reg): + q.extend(get_previous_instructions(vw, cur)) + continue + if insn.mnem == "sub" and isinstance(insn.opers[1], ARMImmOper): + tmp -= insn.opers[1].val + q.extend(get_previous_instructions(vw, cur)) + continue + if insn.mnem == "add" and isinstance(insn.opers[1], ARMImmOper): + tmp += insn.opers[1].val + q.extend(get_previous_instructions(vw, cur)) + continue + if insn.mnem == "mov" and isinstance(insn.opers[1], ARMImmOper): + return insn.opers[1].val + tmp + return None + + def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[int]]: """ scan backwards from the given address looking for assignments to the given register. @@ -106,7 +142,9 @@ def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[ continue opnd0 = insn.opers[0] - if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS): + if not ( + isinstance(opnd0, (i386RegOper, ARMRegOper)) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS + ): q.extend(get_previous_instructions(vw, cur)) continue @@ -115,16 +153,24 @@ def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[ # we currently only support extracting the constant from something like: `mov $reg, IAT` # so, any other pattern results in an unknown value, represented by None. # this is a good place to extend in the future, if we need more robust support. - if insn.mnem != "mov": + if insn.mnem not in ("mov", "ldr"): return (cur, None) else: opnd1 = insn.opers[1] - if isinstance(opnd1, i386ImmOper): + if isinstance(opnd1, (i386ImmOper, ARMImmOper)): return (cur, opnd1.getOperValue(opnd1)) elif isinstance(opnd1, i386ImmMemOper): return (cur, opnd1.getOperAddr(opnd1)) elif isinstance(opnd1, Amd64RipRelOper): return (cur, opnd1.getOperAddr(insn)) + elif isinstance(opnd1, ARMScaledOffsetOper): + base_reg = find_value(vw, cur, opnd1.base_reg, q) + if base_reg is None: + return (cur, None) + offset_reg = find_value(vw, cur, opnd1.offset_reg, q) + if offset_reg is None: + return (cur, None) + return (cur, base_reg + offset_reg) else: # might be something like: `mov $reg, dword_401000[eax]` return (cur, None) @@ -136,7 +182,9 @@ def is_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> bool: if insn is None: insn = vw.parseOpcode(va) - return insn.mnem in ("call", "jmp") and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper) + return insn.mnem in ("call", "jmp", "bl", "blx", "b", "bx") and isinstance( + insn.opers[0], (envi.archs.i386.disasm.i386RegOper, envi.archs.arm.disasm.ArmRegOper) + ) def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> Tuple[int, Optional[int]]: diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 7b88dd2de..10ad7f25d 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -23,6 +23,8 @@ from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors.elf import SymTab +from capa.features.extractors.viv.helpers import read_memory +from capa.features.extractors.viv.syscall import get_library_function_name from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call @@ -81,6 +83,15 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if f.vw.getFunctionMeta(f.va, "Thunk"): return + # Added a case for catching basic blocks that contain direct calls to system functions. + if insn.mnem in ("int", "syscall"): + if insn.mnem != "int" or insn.opers[0].imm == 128: + name = get_library_function_name(f.vw, bb) + if name is None: + return + yield API(name), ih.address + return + # traditional call via IAT if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): oper = insn.opers[0] @@ -222,22 +233,6 @@ def derefs(vw, p): p = next -def read_memory(vw, va: int, size: int) -> bytes: - # as documented in #176, vivisect will not readMemory() when the section is not marked readable. - # - # but here, we don't care about permissions. - # so, copy the viv implementation of readMemory and remove the permissions check. - # - # this is derived from: - # https://github.com/vivisect/vivisect/blob/5eb4d237bddd4069449a6bc094d332ceed6f9a96/envi/memory.py#L453-L462 - for mva, mmaxva, mmap, mbytes in vw._map_defs: - if va >= mva and va < mmaxva: - mva, msize, mperms, mfname = mmap - offset = va - mva - return mbytes[offset : offset + size] - raise envi.exc.SegmentationViolation(va) - - def read_bytes(vw, va: int) -> bytes: """ read up to MAX_BYTES_FEATURE_SIZE from the given address. diff --git a/capa/features/extractors/viv/insn_arm.py b/capa/features/extractors/viv/insn_arm.py new file mode 100644 index 000000000..64662edf7 --- /dev/null +++ b/capa/features/extractors/viv/insn_arm.py @@ -0,0 +1,481 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. +import logging +from typing import List, Tuple, Callable, Iterator + +import envi +import envi.exc +import viv_utils +import envi.memory +import viv_utils.flirt +import envi.archs.i386.regs +import envi.archs.amd64.regs +import envi.archs.i386.disasm +import envi.archs.amd64.disasm + +import capa.features.extractors.helpers +import capa.features.extractors.viv.helpers +from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset +from capa.features.common import Bytes, String, Feature, Characteristic +from capa.features.address import Address, AbsoluteVirtualAddress +from capa.features.extractors.viv.insn import ( + derefs, + read_bytes, + get_imports, + get_section, + read_string, + is_security_cookie, +) +from capa.features.extractors.viv.helpers import read_memory +from capa.features.extractors.viv.syscall import get_library_function_name_arm +from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle +from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call, get_previous_instructions + +# security cookie checks may perform non-zeroing XORs, these are expected within a certain +# byte range within the first and returning basic blocks, this helps to reduce FP features +SECURITY_COOKIE_BYTES_DELTA = 0x40 + + +logger = logging.getLogger("capa") + + +def interface_extract_instruction_XXX( + fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle +) -> Iterator[Tuple[Feature, Address]]: + """ + parse features from the given instruction. + + args: + fh: the function handle to process. + bbh: the basic block handle to process. + ih: the instruction handle to process. + + yields: + (Feature, Address): the feature and the address at which its found. + """ + raise NotImplementedError + + +def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: + """ + parse API features from the given instruction. + """ + + insn: envi.Opcode = ih.inner + f: viv_utils.Function = fh.inner + + if insn.mnem not in ("blx", "bx", "bl", "b", "svc"): + return + + if isinstance(insn.opers[0], envi.archs.arm.disasm.ArmPcOffsetOper): + imports = get_imports(f.vw) + target = capa.features.extractors.viv.helpers.get_coderef_from(f.vw, insn.va) + if not target: + return + + if viv_utils.flirt.is_library_function(f.vw, target): + name = viv_utils.get_function_name(f.vw, target) + yield API(name), ih.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield API(name[1:]), ih.address + return + + if read_memory(f.vw, target, 4) != b"\x00\xc6\x8f\xe2": + return + api = f.vw.parseOpcode(target).opers[-1].getOperValue(insn) + op = f.vw.parseOpcode(target + 4).opers[-1] + api += envi.archs.arm.disasm.shifters[op.shtype](op.val, op.shval, op.size, emu=None) + api += f.vw.parseOpcode(target + 8).opers[-1].offset + + if api not in imports: + return + dll, symbol = imports[api] + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), ih.address + + # Added a case for catching basic blocks that contain direct calls to system functions. + elif isinstance(insn.opers[0], envi.archs.arm.disasm.ArmImmOper): + if insn.mnem != "svc": + return + + name = get_library_function_name_arm(f.vw, bb) + if not name: + return + yield API(name), ih.address + + elif isinstance(insn.opers[0], envi.archs.arm.disasm.ArmRegOper): + # arm ret: bx lr + if insn.mnem == "bx" and insn.opers[0].reg == 14: + return # just return befor the blx + try: + (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn) + except NotFoundError as e: + logger.warning("Not able to resolve the indirect call : %s", str(e)) + return + + if target is None: + return + + imports = get_imports(f.vw) + if target in imports: + dll, symbol = imports[target] + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), ih.address + + else: + pass + + +def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: + """ + parse byte sequence features from the given instruction. + example: + # push offset iid_004118d4_IShellLinkA ; riid + """ + insn: envi.Opcode = ih.inner + f: viv_utils.Function = fh.inner + + if insn.mnem == "bl" or insn.mnem == "blx": + return + + for oper in insn.opers: + if isinstance(oper, envi.archs.arm.disasm.ArmImmOper): + v = oper.getOperValue(oper) + elif isinstance(oper, envi.archs.arm.disasm.ArmRegOper): + continue + elif isinstance(oper, envi.archs.arm.disasm.ArmRegShiftImmOper) and oper.reg == 0xF: # REG_PC + v = oper.getOperValue(oper) + + else: + continue + + for v in derefs(f.vw, v): + try: + buf = read_bytes(f.vw, v) + except envi.exc.SegmentationViolation: + continue + + if capa.features.extractors.helpers.all_zeros(buf): + continue + + yield Bytes(buf), ih.address + + +def extract_insn_nzxor_characteristic_features( + fh: FunctionHandle, bbhandle: BBHandle, ih: InsnHandle +) -> Iterator[Tuple[Feature, Address]]: + """ + parse non-zeroing XOR instruction from the given instruction. + ignore expected non-zeroing XORs, e.g. security cookies. + """ + insn: envi.Opcode = ih.inner + bb: viv_utils.BasicBlock = bbhandle.inner + f: viv_utils.Function = fh.inner + + if insn.mnem not in ("eor", "eors", "veor"): + return + + if insn.opers[0] == insn.opers[1]: + return + + if is_security_cookie(f, bb, insn): + return + + yield Characteristic("nzxor"), ih.address + + +def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: + """parse mnemonic features from the given instruction.""" + yield Mnemonic(ih.inner.mnem), ih.address + + +def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: + """ + parse call $+5 instruction from the given instruction. + """ + insn: envi.Opcode = ih.inner + + if insn.mnem not in ("blx", "bl"): # call + return + + if isinstance(insn.opers[0], envi.archs.arm.disasm.ArmPcOffsetOper): + if insn.va + 4 == insn.opers[0].getOperValue(insn): + yield Characteristic("call $+5"), ih.address + + if isinstance(insn.opers[0], envi.archs.arm.disasm.ArmImmOper): + if insn.va + 4 == insn.opers[0].getOperAddr(insn): + yield Characteristic("call $+5"), ih.address + + +def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: + """ + inspect the instruction for a CALL or JMP that crosses section boundaries. + """ + insn: envi.Opcode = ih.inner + f: viv_utils.Function = fh.inner + + for va, flags in insn.getBranches(): + if va is None: + # va may be none for dynamic branches that haven't been resolved, such as `jmp eax`. + continue + + if flags & envi.BR_FALL: + continue + + try: + if insn.mnem in ("b", "bl", "bx", "blx") and isinstance( + insn.opers[0], envi.archs.arm.disasm.ArmImmOffsetOper + ): + oper = insn.opers[0] + target = oper.getOperAddr(insn) + + if target in get_imports(f.vw): + continue + + if get_section(f.vw, insn.va) != get_section(f.vw, va): + yield Characteristic("cross section flow"), ih.address + + except KeyError: + continue + + +# this is a feature that's most relevant at the function scope, +# however, its most efficient to extract at the instruction scope. +def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: + insn: envi.Opcode = ih.inner + f: viv_utils.Function = fh.inner + + if insn.mnem != "blx" and insn.mnem != "bl": + return + + target = None + + if isinstance(insn.opers[0], envi.archs.arm.disasm.ArmImmOffsetOper): + oper = insn.opers[0] + target = oper.getOperAddr(insn) + yield Characteristic("calls from"), AbsoluteVirtualAddress(target) + + elif isinstance(insn.opers[0], envi.archs.arm.disasm.ArmPcOffsetOper): + target = insn.opers[0].getOperValue(insn) + if target >= 0: + yield Characteristic("calls from"), AbsoluteVirtualAddress(target) + + if target and target == f.va: + # if we found a jump target and it's the function address + # mark as recursive + yield Characteristic("recursive call"), AbsoluteVirtualAddress(target) + + +# this is a feature that's most relevant at the function or basic block scope, +# however, its most efficient to extract at the instruction scope. +def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: + """ + extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4]) + does not include calls like => call ds:dword_ABD4974 + """ + insn: envi.Opcode = ih.inner + + if insn.mnem not in ("bl", "blx"): + return + + if isinstance(insn.opers[0], envi.archs.arm.disasm.ArmRegOper): + yield Characteristic("indirect call"), ih.address + elif isinstance(insn.opers[0], envi.archs.arm.disasm.ArmRegOffsetOper): + yield Characteristic("indirect call"), ih.address + elif isinstance(insn.opers[0], envi.archs.arm.disasm.ArmRegScalarOper): + yield Characteristic("indirect call"), ih.address + + +def extract_op_number_features( + fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand +) -> Iterator[Tuple[Feature, Address]]: + """parse number features from the given operand.""" + + insn: envi.Opcode = ih.inner + f: viv_utils.Function = fh.inner + + # not sure for ImmOffsetOper + if not isinstance(oper, (envi.archs.arm.disasm.ArmImmOper, envi.archs.arm.disasm.ArmImmOffsetOper)): + return + + v = oper.getOperValue(oper) + + if f.vw.probeMemory(v, 1, envi.memory.MM_READ): + # this is a valid address + # assume its not also a constant. + return + + if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.arm.regs.REG_SP: + # skip things like: + # + # .text:00401140 call sub_407E2B + # .text:00401145 add esp, 0Ch + return + + yield Number(v), ih.address + yield OperandNumber(i, v), ih.address + + if insn.mnem == "add" and 0 < v < MAX_STRUCTURE_SIZE and isinstance(oper, envi.archs.arm.disasm.ArmImmOper): + # for pattern like: + # + # add eax, 0x10 + # + # assume 0x10 is also an offset (imagine eax is a pointer). + yield Offset(v), ih.address + yield OperandOffset(i, v), ih.address + + +def extract_op_offset_features( + fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand +) -> Iterator[Tuple[Feature, Address]]: + """parse structure offset features from the given operand.""" + # example: + # + # .text:0040112F cmp [esi+4], ebx + insn: envi.Opcode = ih.inner + f: viv_utils.Function = fh.inner + + # this is for both x32 and x64 + # like [esi + 4] + # reg ^ + # disp + if isinstance(oper, envi.archs.arm.disasm.ArmRegOffsetOper): + if oper.base_reg == envi.archs.arm.regs.REG_SP: + return + + if oper.base_reg == envi.archs.arm.regs.REG_BP: + return + + v = oper.offset_reg + + yield Offset(v), ih.address + yield OperandOffset(i, v), ih.address + + if insn.mnem == "ldr" and i == 1 and not f.vw.probeMemory(v, 1, envi.memory.MM_READ): + yield Number(v), ih.address + yield OperandNumber(i, v), ih.address + + # like: [esi + ecx + 16384] + # reg ^ ^ + # index ^ + # disp + elif isinstance(oper, envi.archs.arm.disasm.ArmRegShiftImmOper): + v = oper.shimm + + yield Offset(v), ih.address + yield OperandOffset(i, v), ih.address + + +def extract_op_string_features( + fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand +) -> Iterator[Tuple[Feature, Address]]: + """parse string features from the given operand.""" + # example: + # + # push offset aAcr ; "ACR > " + f: viv_utils.Function = fh.inner + + if isinstance(oper, envi.archs.arm.disasm.ArmImmOper): + v = oper.getOperValue(oper) + elif isinstance(oper, envi.archs.arm.disasm.ArmImmOffsetOper): + v = oper.getOperAddr(oper) + elif isinstance(oper, envi.archs.arm.disasm.ArmRegShiftImmOper): + v = oper.shimm + else: + return + + for v in derefs(f.vw, v): + try: + s = read_string(f.vw, v).rstrip("\x00") + except ValueError: + continue + else: + if len(s) >= 4: + yield String(s), ih.address + + +def extract_op_string_arm(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle): + insn = ih.inner + if not ( + insn.mnem == "add" + and len(insn.opers) == 2 + and isinstance(insn.opers[0], envi.archs.arm.disasm.ArmRegOper) + and isinstance(insn.opers[1], envi.archs.arm.disasm.ArmRegOper) + and insn.opers[1].reg == envi.archs.arm.regs.REG_PC + ): + return + + f = fh.inner + target = insn.opers[0].reg + prev_inst = get_previous_instructions(f.vw, ih.address) + while 1: + i = f.vw.parseOpcode(prev_inst[0]) + if len(i.opers) == 2: + if isinstance(i.opers[0], envi.archs.arm.disasm.ArmRegOper) and i.opers[0].reg == target: + if i.mnem == "ldr": + addr = i.opers[1].getOperAddr(i.opers[1]) + off = int.from_bytes(read_memory(f.vw, addr, 4), "big" if f.vw.bigend else "little") + res = ih.address + 4 + off # may not be always 4, (next next inst) + try: + s = read_string(f.vw, res) + yield String(s.rstrip("\x00")), ih.address + except Exception: + return + return + prev_inst = get_previous_instructions(f.vw, prev_inst[0]) + + +def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]: + for i, oper in enumerate(insn.inner.opers): + for op_handler in OPERAND_HANDLERS: + for feature, addr in op_handler(f, bb, insn, i, oper): + yield feature, addr + + +OPERAND_HANDLERS: List[ + Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[Tuple[Feature, Address]]] +] = [ + extract_op_number_features, + extract_op_offset_features, + extract_op_string_features, +] + + +def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]: + """ + extract features from the given insn. + + args: + f (viv_utils.Function): the function from which to extract features + bb (viv_utils.BasicBlock): the basic block to process. + insn (vivisect...Instruction): the instruction to process. + + yields: + Tuple[Feature, Address]: the features and their location found in this insn. + """ + for insn_handler in INSTRUCTION_HANDLERS: + for feature, addr in insn_handler(f, bb, insn): + yield feature, addr + + +INSTRUCTION_HANDLERS: List[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[Tuple[Feature, Address]]]] = [ + extract_insn_api_features, + extract_insn_bytes_features, + extract_insn_nzxor_characteristic_features, + extract_insn_mnemonic_features, + extract_insn_obfs_call_plus_5_characteristic_features, + extract_insn_cross_section_cflow, + extract_function_calls_from, + extract_function_indirect_call_characteristic_features, + extract_op_string_arm, + extract_operand_features, +] diff --git a/capa/features/extractors/viv/syscall.py b/capa/features/extractors/viv/syscall.py new file mode 100644 index 000000000..f1e96eb37 --- /dev/null +++ b/capa/features/extractors/viv/syscall.py @@ -0,0 +1,1359 @@ +from typing import Callable + +import viv_utils +import viv_utils.flirt +import envi.archs.arm.disasm +import envi.archs.i386.disasm +import envi.archs.amd64.disasm + +from capa.features.common import ARCH_ARM, ARCH_I386, ARCH_AMD64 +from capa.features.address import AbsoluteVirtualAddress +from capa.features.extractors.viv.helpers import read_memory +from capa.features.extractors.base_extractor import BBHandle, FeatureExtractor + + +def getSyscallName(num: int, arch: bool) -> str: + if arch: + return Sys_x86[num] + return Sys_x64[num] + + +def get_library_function_name(vw, bb: BBHandle) -> str: + """Function objective is to find the name of the syscall. + First step for achiving that is to extract the value stored in rax/eax + register ; + """ + num = -1 + start = [i.mnem for i in bb.inner.instructions].index("int" if vw.psize == 4 else "syscall") + insn = bb.inner.instructions[start] + if vw.psize == 4: + if insn.opers[0].imm != 128: + return "" + + target = 0 # rax + for i in bb.inner.instructions[start::-1]: + if not len(i.opers) == 2: + continue + if not ( + i.mnem == "mov" and isinstance(i.opers[0], envi.archs.i386.disasm.i386RegOper) and i.opers[0].reg == target + ): + continue + + if isinstance(i.opers[1], envi.archs.i386.disasm.i386ImmOper): + break + if isinstance(i.opers[1], envi.archs.i386.disasm.i386RegOper): + target = i.opers[1].reg + + try: + if len(i.opers) < 2: + return "" + num = i.opers[1].imm + name = getSyscallName(num, vw.psize == 4) + except Exception: + return "" + return name + + +def get_library_function_name_arm(vw, bb: BBHandle): + target = 7 + num = -1 + start = [i.mnem for i in bb.inner.instructions].index("svc") + if bb.inner.instructions[start].opers[0].val == 0: + for i in bb.inner.instructions[start - 1 :: -1]: + if not ( + isinstance(i.opers[0], envi.archs.arm.disasm.ArmRegOper) + and i.opers[0].reg == target + and i.mnem in ("ldr", "mov") + ): + continue + if isinstance(i.opers[1], envi.archs.arm.disasm.ArmImmOper): # mov + num = i.opers[1].val + break + if isinstance(i.opers[1], envi.archs.arm.disasm.ArmImmOffsetOper): # ldr + # pc point next next inst + addr = (i.va + 8) + i.opers[1].offset + try: + num = int.from_bytes(read_memory(vw, addr, 4), "big" if vw.bigend else "little") + except Exception: + return None + break + + if num == -1: + return + else: + num = bb.inner.instructions[start].opers[0].val & 0xFFFF + try: + name = Sys_arm[num] + except Exception: + return None + return name + + +def add_library_function(vw, bb: BBHandle, get_library_function_name_: Callable) -> None: + name = get_library_function_name_(vw, bb) + if not name: + return + try: + add = AbsoluteVirtualAddress(bb.inner.instructions[0].va) + if name == "exit": # dont hook exit() + return + viv_utils.flirt.make_library_function(vw, add) + viv_utils.set_function_name(vw, add, name) + # not a function addresss + except Exception: + return + + +def is_static(buf: bytes) -> bool: + byteorder = "little" if buf[0x05] == 1 else "big" + arch = buf[0x04] + + # 32-bit format + if arch == 1: + e_shoff = int.from_bytes(buf[0x20:0x24], byteorder) + e_shentsize = int.from_bytes(buf[0x2E:0x30], byteorder) + e_shstr = e_shoff + e_shentsize * int.from_bytes(buf[0x32:0x34], byteorder) + str_sh_offset = int.from_bytes(buf[e_shstr + 0x10 : e_shstr + 0x14], byteorder) + str_sh_size = int.from_bytes(buf[e_shstr + 0x14 : e_shstr + 0x20], byteorder) + # 64-bit format + else: + e_shoff = int.from_bytes(buf[0x28:0x30], byteorder) + e_shentsize = int.from_bytes(buf[0x3A:0x3C], byteorder) + e_shstr = e_shoff + e_shentsize * int.from_bytes(buf[0x3E:0x40], byteorder) + str_sh_offset = int.from_bytes(buf[e_shstr + 0x18 : e_shstr + 0x20], byteorder) + str_sh_size = int.from_bytes(buf[e_shstr + 0x20 : e_shstr + 0x28], byteorder) + + strings = buf[str_sh_offset : str_sh_offset + str_sh_size].split(b"\x00") + return b".dynamic" not in strings + + +def resolve_syscall_functions(extractor: FeatureExtractor): + with open(extractor.path, "rb") as f: + buf = f.read() + # only resolve syscall if the file is staticaly linked + if not is_static(buf): + return + import capa.features.extractors.common + + arch = list(capa.features.extractors.common.extract_arch(buf))[0][0].value + del buf # free memory + + functions = list(extractor.get_functions()) + if arch in (ARCH_I386, ARCH_AMD64): + for function in functions: + for bb in extractor.get_basic_blocks(function): + insn = list(extractor.get_instructions(function, bb)) + + vw = extractor.vw + mnem_list = [i.inner.mnem for i in insn] + if not (mnem_list.count("int" if vw.psize == 4 else "syscall") == 1): + continue + add_library_function(extractor.vw, bb, get_library_function_name) + + elif arch == ARCH_ARM: + for function in functions: + for bb in extractor.get_basic_blocks(function): + insn = list(extractor.get_instructions(function, bb)) + mnem_list = [i.inner.mnem for i in insn] + if mnem_list.count("svc") == 1: # bx lr + add_library_function(extractor.vw, bb, get_library_function_name_arm) + + +Sys_x64 = { + 0: "read", + 1: "write", + 2: "open", + 3: "close", + 4: "stat", + 5: "fstat", + 6: "lstat", + 7: "poll", + 8: "lseek", + 9: "mmap", + 10: "mprotect", + 11: "munmap", + 12: "brk", + 13: "rt_sigaction", + 14: "rt_sigprocmask", + 15: "rt_sigreturn", + 16: "ioctl", + 17: "pread64", + 18: "pwrite64", + 19: "readv", + 20: "writev", + 21: "access", + 22: "pipe", + 23: "select", + 24: "sched_yield", + 25: "mremap", + 26: "msync", + 27: "mincore", + 28: "madvise", + 29: "shmget", + 30: "shmat", + 31: "shmctl", + 32: "dup", + 33: "dup2", + 34: "pause", + 35: "nanosleep", + 36: "getitimer", + 37: "alarm", + 38: "setitimer", + 39: "getpid", + 40: "sendfile", + 41: "socket", + 42: "connect", + 43: "accept", + 44: "sendto", + 45: "recvfrom", + 46: "sendmsg", + 47: "recvmsg", + 48: "shutdown", + 49: "bind", + 50: "listen", + 51: "getsockname", + 52: "getpeername", + 53: "socketpair", + 54: "setsockopt", + 55: "getsockopt", + 56: "clone", + 57: "fork", + 58: "vfork", + 59: "execve", + 60: "exit", + 61: "wait4", + 62: "kill", + 63: "uname", + 64: "semget", + 65: "semop", + 66: "semctl", + 67: "shmdt", + 68: "msgget", + 69: "msgsnd", + 70: "msgrcv", + 71: "msgctl", + 72: "fcntl", + 73: "flock", + 74: "fsync", + 75: "fdatasync", + 76: "truncate", + 77: "ftruncate", + 78: "getdents", + 79: "getcwd", + 80: "chdir", + 81: "fchdir", + 82: "rename", + 83: "mkdir", + 84: "rmdir", + 85: "creat", + 86: "link", + 87: "unlink", + 88: "symlink", + 89: "readlink", + 90: "chmod", + 91: "fchmod", + 92: "chown", + 93: "fchown", + 94: "lchown", + 95: "umask", + 96: "gettimeofday", + 97: "getrlimit", + 98: "getrusage", + 99: "sysinfo", + 100: "times", + 101: "ptrace", + 102: "getuid", + 103: "syslog", + 104: "getgid", + 105: "setuid", + 106: "setgid", + 107: "geteuid", + 108: "getegid", + 109: "setpgid", + 110: "getppid", + 111: "getpgrp", + 112: "setsid", + 113: "setreuid", + 114: "setregid", + 115: "getgroups", + 116: "setgroups", + 117: "setresuid", + 118: "getresuid", + 119: "setresgid", + 120: "getresgid", + 121: "getpgid", + 122: "setfsuid", + 123: "setfsgid", + 124: "getsid", + 125: "capget", + 126: "capset", + 127: "rt_sigpending", + 128: "rt_sigtimedwait", + 129: "rt_sigqueueinfo", + 130: "rt_sigsuspend", + 131: "sigaltstack", + 132: "utime", + 133: "mknod", + 134: "uselib", + 135: "personality", + 136: "ustat", + 137: "statfs", + 138: "fstatfs", + 139: "sysfs", + 140: "getpriority", + 141: "setpriority", + 142: "sched_setparam", + 143: "sched_getparam", + 144: "sched_setscheduler", + 145: "sched_getscheduler", + 146: "sched_get_priority_max", + 147: "sched_get_priority_min", + 148: "sched_rr_get_interval", + 149: "mlock", + 150: "munlock", + 151: "mlockall", + 152: "munlockall", + 153: "vhangup", + 154: "modify_ldt", + 155: "pivot_root", + 156: "_sysctl", + 157: "prctl", + 158: "arch_prctl", + 159: "adjtimex", + 160: "setrlimit", + 161: "chroot", + 162: "sync", + 163: "acct", + 164: "settimeofday", + 165: "mount", + 166: "umount2", + 167: "swapon", + 168: "swapoff", + 169: "reboot", + 170: "sethostname", + 171: "setdomainname", + 172: "iopl", + 173: "ioperm", + 174: "create_module", + 175: "init_module", + 176: "delete_module", + 177: "get_kernel_syms", + 178: "query_module", + 179: "quotactl", + 180: "nfsservctl", + 181: "getpmsg", + 186: "gettid", + 187: "readahead", + 188: "setxattr", + 189: "lsetxattr", + 190: "fsetxattr", + 191: "getxattr", + 192: "lgetxattr", + 193: "fgetxattr", + 194: "listxattr", + 195: "llistxattr", + 196: "flistxattr", + 197: "removexattr", + 198: "lremovexattr", + 199: "fremovexattr", + 200: "tkill", + 201: "time", + 202: "futex", + 203: "sched_setaffinity", + 204: "sched_getaffinity", + 205: "set_thread_area", + 206: "io_setup", + 207: "io_destroy", + 208: "io_getevents", + 209: "io_submit", + 210: "io_cancel", + 211: "get_thread_area", + 212: "lookup_dcookie", + 213: "epoll_create", + 214: "epoll_ctl_old", + 215: "epoll_wait_old", + 216: "remap_file_pages", + 217: "getdents64", + 218: "set_tid_address", + 219: "restart_syscall", + 220: "semtimedop", + 221: "fadvise64", + 222: "timer_create", + 223: "timer_settime", + 224: "timer_gettime", + 225: "timer_getoverrun", + 226: "timer_delete", + 227: "clock_settime", + 228: "clock_gettime", + 229: "clock_getres", + 230: "clock_nanosleep", + 231: "exit_group", + 232: "epoll_wait", + 233: "epoll_ctl", + 234: "tgkill", + 235: "utimes", + 237: "mbind", + 238: "set_mempolicy", + 239: "get_mempolicy", + 240: "mq_open", + 241: "mq_unlink", + 242: "mq_timedsend", + 243: "mq_timedreceive", + 244: "mq_notify", + 245: "mq_getsetattr", + 246: "kexec_load", + 247: "waitid", + 248: "add_key", + 249: "request_key", + 250: "keyctl", + 251: "ioprio_set", + 252: "ioprio_get", + 253: "inotify_init", + 254: "inotify_add_watch", + 255: "inotify_rm_watch", + 256: "migrate_pages", + 257: "openat", + 258: "mkdirat", + 259: "mknodat", + 260: "fchownat", + 261: "futimesat", + 262: "newfstatat", + 263: "unlinkat", + 264: "renameat", + 265: "linkat", + 266: "symlinkat", + 267: "readlinkat", + 268: "fchmodat", + 269: "faccessat", + 270: "pselect6", + 271: "ppoll", + 272: "unshare", + 273: "set_robust_list", + 274: "get_robust_list", + 275: "splice", + 276: "tee", + 277: "sync_file_range", + 278: "vmsplice", + 279: "move_pages", + 280: "utimensat", + 281: "epoll_pwait", + 282: "signalfd", + 283: "timerfd_create", + 284: "eventfd", + 285: "fallocate", + 286: "timerfd_settime", + 287: "timerfd_gettime", + 288: "accept4", + 289: "signalfd4", + 290: "eventfd2", + 291: "epoll_create1", + 292: "dup3", + 293: "pipe2", + 294: "inotify_init1", + 295: "preadv", + 296: "pwritev", + 297: "rt_tgsigqueueinfo", + 298: "perf_event_open", + 299: "recvmmsg", + 300: "fanotify_init", + 301: "fanotify_mark", + 302: "prlimit64", + 303: "name_to_handle_at", + 304: "open_by_handle_at", + 305: "clock_adjtime", + 306: "syncfs", + 307: "sendmmsg", + 308: "setns", + 309: "getcpu", + 310: "process_vm_readv", + 311: "process_vm_writev", + 312: "kcmp", + 313: "finit_module", + 314: "sched_setattr", + 315: "sched_getattr", + 316: "renameat2", + 317: "seccomp", + 318: "getrandom", + 319: "memfd_create", + 320: "kexec_file_load", + 321: "bpf", + 322: "execveat", + 323: "userfaultfd", + 324: "membarrier", + 325: "mlock2", + 326: "copy_file_range", + 327: "preadv2", + 328: "pwritev2", + 329: "pkey_mprotect", + 330: "pkey_alloc", + 331: "pkey_free", + 332: "statx", + 333: "io_pgetevents", + 334: "rseq", + 424: "pidfd_send_signal", + 425: "io_uring_setup", + 426: "io_uring_enter", + 427: "io_uring_register", + 428: "open_tree", + 429: "move_mount", + 430: "fsopen", + 431: "fsconfig", + 432: "fsmount", + 433: "fspick", + 434: "pidfd_open", + 435: "clone3", + 436: "close_range", + 437: "openat2", + 438: "pidfd_getfd", + 439: "faccessat2", + 440: "process_madvise", + 441: "epoll_pwait2", + 442: "mount_setattr", + 443: "quotactl_fd", + 444: "landlock_create_ruleset", + 445: "landlock_add_rule", + 446: "landlock_restrict_self", + 447: "memfd_secret", + 448: "process_mrelease", + 449: "futex_waitv", + 450: "set_mempolicy_home_node", +} + + +Sys_x86 = { + 0: "restart_syscall", + 1: "exit", + 2: "fork", + 3: "read", + 4: "write", + 5: "open", + 6: "close", + 7: "waitpid", + 8: "creat", + 9: "link", + 10: "unlink", + 11: "execve", + 12: "chdir", + 13: "time", + 14: "mknod", + 15: "chmod", + 16: "lchown", + 18: "oldstat", + 19: "lseek", + 20: "getpid", + 21: "mount", + 22: "umount", + 23: "setuid", + 24: "getuid", + 25: "stime", + 26: "ptrace", + 27: "alarm", + 28: "oldfstat", + 29: "pause", + 30: "utime", + 33: "access", + 34: "nice", + 36: "sync", + 37: "kill", + 38: "rename", + 39: "mkdir", + 40: "rmdir", + 41: "dup", + 42: "pipe", + 43: "times", + 45: "brk", + 46: "setgid", + 47: "getgid", + 48: "signal", + 49: "geteuid", + 50: "getegid", + 51: "acct", + 52: "umount2", + 54: "ioctl", + 55: "fcntl", + 57: "setpgid", + 59: "oldolduname", + 60: "umask", + 61: "chroot", + 62: "ustat", + 63: "dup2", + 64: "getppid", + 65: "getpgrp", + 66: "setsid", + 67: "sigaction", + 68: "sgetmask", + 69: "ssetmask", + 70: "setreuid", + 71: "setregid", + 72: "sigsuspend", + 73: "sigpending", + 74: "sethostname", + 75: "setrlimit", + 76: "getrlimit", + 77: "getrusage", + 78: "gettimeofday", + 79: "settimeofday", + 80: "getgroups", + 81: "setgroups", + 82: "select", + 83: "symlink", + 84: "oldlstat", + 85: "readlink", + 86: "uselib", + 87: "swapon", + 88: "reboot", + 89: "readdir", + 90: "mmap", + 91: "munmap", + 92: "truncate", + 93: "ftruncate", + 94: "fchmod", + 95: "fchown", + 96: "getpriority", + 97: "setpriority", + 99: "statfs", + 100: "fstatfs", + 101: "ioperm", + 102: "socketcall", + 103: "syslog", + 104: "setitimer", + 105: "getitimer", + 106: "stat", + 107: "lstat", + 108: "fstat", + 109: "olduname", + 110: "iopl", + 111: "vhangup", + 112: "idle", + 113: "vm86old", + 114: "wait4", + 115: "swapoff", + 116: "sysinfo", + 117: "ipc", + 118: "fsync", + 119: "sigreturn", + 120: "clone", + 121: "setdomainname", + 122: "uname", + 123: "modify_ldt", + 124: "adjtimex", + 125: "mprotect", + 126: "sigprocmask", + 127: "create_module", + 128: "init_module", + 129: "delete_module", + 130: "get_kernel_syms", + 131: "quotactl", + 132: "getpgid", + 133: "fchdir", + 134: "bdflush", + 135: "sysfs", + 136: "personality", + 138: "setfsuid", + 139: "setfsgid", + 140: "_llseek", + 141: "getdents", + 142: "_newselect", + 143: "flock", + 144: "msync", + 145: "readv", + 146: "writev", + 147: "getsid", + 148: "fdatasync", + 149: "_sysctl", + 150: "mlock", + 151: "munlock", + 152: "mlockall", + 153: "munlockall", + 154: "sched_setparam", + 155: "sched_getparam", + 156: "sched_setscheduler", + 157: "sched_getscheduler", + 158: "sched_yield", + 159: "sched_get_priority_max", + 160: "sched_get_priority_min", + 161: "sched_rr_get_interval", + 162: "nanosleep", + 163: "mremap", + 164: "setresuid", + 165: "getresuid", + 166: "vm86", + 167: "query_module", + 168: "poll", + 169: "nfsservctl", + 170: "setresgid", + 171: "getresgid", + 172: "prctl", + 173: "rt_sigreturn", + 174: "rt_sigaction", + 175: "rt_sigprocmask", + 176: "rt_sigpending", + 177: "rt_sigtimedwait", + 178: "rt_sigqueueinfo", + 179: "rt_sigsuspend", + 180: "pread64", + 181: "pwrite64", + 182: "chown", + 183: "getcwd", + 184: "capget", + 185: "capset", + 186: "sigaltstack", + 187: "sendfile", + 188: "getpmsg", + 190: "vfork", + 191: "ugetrlimit", + 192: "mmap2", + 193: "truncate64", + 194: "ftruncate64", + 195: "stat64", + 196: "lstat64", + 197: "fstat64", + 198: "lchown32", + 199: "getuid32", + 200: "getgid32", + 201: "geteuid32", + 202: "getegid32", + 203: "setreuid32", + 204: "setregid32", + 205: "getgroups32", + 206: "setgroups32", + 207: "fchown32", + 208: "setresuid32", + 209: "getresuid32", + 210: "setresgid32", + 211: "getresgid32", + 212: "chown32", + 213: "setuid32", + 214: "setgid32", + 215: "setfsuid32", + 216: "setfsgid32", + 217: "pivot_root", + 218: "mincore", + 219: "madvise", + 220: "getdents64", + 221: "fcntl64", + 224: "gettid", + 225: "readahead", + 226: "setxattr", + 227: "lsetxattr", + 228: "fsetxattr", + 229: "getxattr", + 230: "lgetxattr", + 231: "fgetxattr", + 232: "listxattr", + 233: "llistxattr", + 234: "flistxattr", + 235: "removexattr", + 236: "lremovexattr", + 237: "fremovexattr", + 238: "tkill", + 239: "sendfile64", + 240: "futex", + 241: "sched_setaffinity", + 242: "sched_getaffinity", + 243: "set_thread_area", + 244: "get_thread_area", + 245: "io_setup", + 246: "io_destroy", + 247: "io_getevents", + 248: "io_submit", + 249: "io_cancel", + 250: "fadvise64", + 252: "exit_group", + 253: "lookup_dcookie", + 254: "epoll_create", + 255: "epoll_ctl", + 256: "epoll_wait", + 257: "remap_file_pages", + 258: "set_tid_address", + 259: "timer_create", + 260: "timer_settime", + 261: "timer_gettime", + 262: "timer_getoverrun", + 263: "timer_delete", + 264: "clock_settime", + 265: "clock_gettime", + 266: "clock_getres", + 267: "clock_nanosleep", + 268: "statfs64", + 269: "fstatfs64", + 270: "tgkill", + 271: "utimes", + 272: "fadvise64_64", + 274: "mbind", + 275: "get_mempolicy", + 276: "set_mempolicy", + 277: "mq_open", + 278: "mq_unlink", + 279: "mq_timedsend", + 280: "mq_timedreceive", + 281: "mq_notify", + 282: "mq_getsetattr", + 283: "kexec_load", + 284: "waitid", + 286: "add_key", + 287: "request_key", + 288: "keyctl", + 289: "ioprio_set", + 290: "ioprio_get", + 291: "inotify_init", + 292: "inotify_add_watch", + 293: "inotify_rm_watch", + 294: "migrate_pages", + 295: "openat", + 296: "mkdirat", + 297: "mknodat", + 298: "fchownat", + 299: "futimesat", + 300: "fstatat64", + 301: "unlinkat", + 302: "renameat", + 303: "linkat", + 304: "symlinkat", + 305: "readlinkat", + 306: "fchmodat", + 307: "faccessat", + 308: "pselect6", + 309: "ppoll", + 310: "unshare", + 311: "set_robust_list", + 312: "get_robust_list", + 313: "splice", + 314: "sync_file_range", + 315: "tee", + 316: "vmsplice", + 317: "move_pages", + 318: "getcpu", + 319: "epoll_pwait", + 320: "utimensat", + 321: "signalfd", + 322: "timerfd_create", + 323: "eventfd", + 324: "fallocate", + 325: "timerfd_settime", + 326: "timerfd_gettime", + 327: "signalfd4", + 328: "eventfd2", + 329: "epoll_create1", + 330: "dup3", + 331: "pipe2", + 332: "inotify_init1", + 333: "preadv", + 334: "pwritev", + 335: "rt_tgsigqueueinfo", + 336: "perf_event_open", + 337: "recvmmsg", + 338: "fanotify_init", + 339: "fanotify_mark", + 340: "prlimit64", + 341: "name_to_handle_at", + 342: "open_by_handle_at", + 343: "clock_adjtime", + 344: "syncfs", + 345: "sendmmsg", + 346: "setns", + 347: "process_vm_readv", + 348: "process_vm_writev", + 349: "kcmp", + 350: "finit_module", + 351: "sched_setattr", + 352: "sched_getattr", + 353: "renameat2", + 354: "seccomp", + 355: "getrandom", + 356: "memfd_create", + 357: "bpf", + 358: "execveat", + 359: "socket", + 360: "socketpair", + 361: "bind", + 362: "connect", + 363: "listen", + 364: "accept4", + 365: "getsockopt", + 366: "setsockopt", + 367: "getsockname", + 368: "getpeername", + 369: "sendto", + 370: "sendmsg", + 371: "recvfrom", + 372: "recvmsg", + 373: "shutdown", + 374: "userfaultfd", + 375: "membarrier", + 376: "mlock2", + 377: "copy_file_range", + 378: "preadv2", + 379: "pwritev2", + 380: "pkey_mprotect", + 381: "pkey_alloc", + 382: "pkey_free", + 383: "statx", + 384: "arch_prctl", + 385: "io_pgetevents", + 386: "rseq", + 393: "semget", + 394: "semctl", + 395: "shmget", + 396: "shmctl", + 397: "shmat", + 398: "shmdt", + 399: "msgget", + 400: "msgsnd", + 401: "msgrcv", + 402: "msgctl", + 403: "clock_gettime64", + 404: "clock_settime64", + 405: "clock_adjtime64", + 406: "clock_getres_time64", + 407: "clock_nanosleep_time64", + 408: "timer_gettime64", + 409: "timer_settime64", + 410: "timerfd_gettime64", + 411: "timerfd_settime64", + 412: "utimensat_time64", + 413: "pselect6_time64", + 414: "ppoll_time64", + 416: "io_pgetevents_time64", + 417: "recvmmsg_time64", + 418: "mq_timedsend_time64", + 419: "mq_timedreceive_time64", + 420: "semtimedop_time64", + 421: "rt_sigtimedwait_time64", + 422: "futex_time64", + 423: "sched_rr_get_interval_time64", + 424: "pidfd_send_signal", + 425: "io_uring_setup", + 426: "io_uring_enter", + 427: "io_uring_register", + 428: "open_tree", + 429: "move_mount", + 430: "fsopen", + 431: "fsconfig", + 432: "fsmount", + 433: "fspick", + 434: "pidfd_open", + 435: "clone3", + 436: "close_range", + 437: "openat2", + 438: "pidfd_getfd", + 439: "faccessat2", + 440: "process_madvise", + 441: "epoll_pwait2", + 442: "mount_setattr", + 443: "quotactl_fd", + 444: "landlock_create_ruleset", + 445: "landlock_add_rule", + 446: "landlock_restrict_self", + 447: "memfd_secret", + 448: "process_mrelease", + 449: "futex_waitv", + 450: "set_mempolicy_home_node", +} + + +Sys_arm = { + 0: "restart_syscall", + 1: "exit", + 2: "fork", + 3: "read", + 4: "write", + 5: "open", + 6: "close", + 8: "creat", + 9: "link", + 10: "unlink", + 11: "execve", + 12: "chdir", + 13: "time", + 14: "mknod", + 15: "chmod", + 16: "lchown", + 19: "lseek", + 20: "getpid", + 21: "mount", + 23: "setuid", + 24: "getuid", + 26: "ptrace", + 29: "pause", + 33: "access", + 34: "nice", + 36: "sync", + 37: "kill", + 38: "rename", + 39: "mkdir", + 40: "rmdir", + 41: "dup", + 42: "pipe", + 43: "times", + 45: "brk", + 46: "setgid", + 47: "getgid", + 49: "geteuid", + 50: "getegid", + 51: "acct", + 52: "umount2", + 54: "ioctl", + 55: "fcntl", + 57: "setpgid", + 60: "umask", + 61: "chroot", + 62: "ustat", + 63: "dup2", + 64: "getppid", + 65: "getpgrp", + 66: "setsid", + 67: "sigaction", + 70: "setreuid", + 71: "setregid", + 72: "sigsuspend", + 73: "sigpending", + 74: "sethostname", + 75: "setrlimit", + 77: "getrusage", + 78: "gettimeofday", + 79: "settimeofday", + 80: "getgroups", + 81: "setgroups", + 83: "symlink", + 85: "readlink", + 86: "uselib", + 87: "swapon", + 88: "reboot", + 91: "munmap", + 92: "truncate", + 93: "ftruncate", + 94: "fchmod", + 95: "fchown", + 96: "getpriority", + 97: "setpriority", + 99: "statfs", + 100: "fstatfs", + 103: "syslog", + 104: "setitimer", + 105: "getitimer", + 106: "stat", + 107: "lstat", + 108: "fstat", + 111: "vhangup", + 114: "wait4", + 115: "swapoff", + 116: "sysinfo", + 118: "fsync", + 119: "sigreturn", + 120: "clone", + 121: "setdomainname", + 122: "uname", + 124: "adjtimex", + 125: "mprotect", + 126: "sigprocmask", + 128: "init_module", + 129: "delete_module", + 131: "quotactl", + 132: "getpgid", + 133: "fchdir", + 134: "bdflush", + 135: "sysfs", + 136: "personality", + 138: "setfsuid", + 139: "setfsgid", + 140: "_llseek", + 141: "getdents", + 142: "_newselect", + 143: "flock", + 144: "msync", + 145: "readv", + 146: "writev", + 147: "getsid", + 148: "fdatasync", + 149: "_sysctl", + 150: "mlock", + 151: "munlock", + 152: "mlockall", + 153: "munlockall", + 154: "sched_setparam", + 155: "sched_getparam", + 156: "sched_setscheduler", + 157: "sched_getscheduler", + 158: "sched_yield", + 159: "sched_get_priority_max", + 160: "sched_get_priority_min", + 161: "sched_rr_get_interval", + 162: "nanosleep", + 163: "mremap", + 164: "setresuid", + 165: "getresuid", + 168: "poll", + 169: "nfsservctl", + 170: "setresgid", + 171: "getresgid", + 172: "prctl", + 173: "rt_sigreturn", + 174: "rt_sigaction", + 175: "rt_sigprocmask", + 176: "rt_sigpending", + 177: "rt_sigtimedwait", + 178: "rt_sigqueueinfo", + 179: "rt_sigsuspend", + 180: "pread64", + 181: "pwrite64", + 182: "chown", + 183: "getcwd", + 184: "capget", + 185: "capset", + 186: "sigaltstack", + 187: "sendfile", + 190: "vfork", + 191: "ugetrlimit", + 192: "mmap2", + 193: "truncate64", + 194: "ftruncate64", + 195: "stat64", + 196: "lstat64", + 197: "fstat64", + 198: "lchown32", + 199: "getuid32", + 200: "getgid32", + 201: "geteuid32", + 202: "getegid32", + 203: "setreuid32", + 204: "setregid32", + 205: "getgroups32", + 206: "setgroups32", + 207: "fchown32", + 208: "setresuid32", + 209: "getresuid32", + 210: "setresgid32", + 211: "getresgid32", + 212: "chown32", + 213: "setuid32", + 214: "setgid32", + 215: "setfsuid32", + 216: "setfsgid32", + 217: "getdents64", + 218: "pivot_root", + 219: "mincore", + 220: "madvise", + 221: "fcntl64", + 224: "gettid", + 225: "readahead", + 226: "setxattr", + 227: "lsetxattr", + 228: "fsetxattr", + 229: "getxattr", + 230: "lgetxattr", + 231: "fgetxattr", + 232: "listxattr", + 233: "llistxattr", + 234: "flistxattr", + 235: "removexattr", + 236: "lremovexattr", + 237: "fremovexattr", + 238: "tkill", + 239: "sendfile64", + 240: "futex", + 241: "sched_setaffinity", + 242: "sched_getaffinity", + 243: "io_setup", + 244: "io_destroy", + 245: "io_getevents", + 246: "io_submit", + 247: "io_cancel", + 248: "exit_group", + 249: "lookup_dcookie", + 250: "epoll_create", + 251: "epoll_ctl", + 252: "epoll_wait", + 253: "remap_file_pages", + 256: "set_tid_address", + 257: "timer_create", + 258: "timer_settime", + 259: "timer_gettime", + 260: "timer_getoverrun", + 261: "timer_delete", + 262: "clock_settime", + 263: "clock_gettime", + 264: "clock_getres", + 265: "clock_nanosleep", + 266: "statfs64", + 267: "fstatfs64", + 268: "tgkill", + 269: "utimes", + 270: "arm_fadvise64_64", + 271: "pciconfig_iobase", + 272: "pciconfig_read", + 273: "pciconfig_write", + 274: "mq_open", + 275: "mq_unlink", + 276: "mq_timedsend", + 277: "mq_timedreceive", + 278: "mq_notify", + 279: "mq_getsetattr", + 280: "waitid", + 281: "socket", + 282: "bind", + 283: "connect", + 284: "listen", + 285: "accept", + 286: "getsockname", + 287: "getpeername", + 288: "socketpair", + 289: "send", + 290: "sendto", + 291: "recv", + 292: "recvfrom", + 293: "shutdown", + 294: "setsockopt", + 295: "getsockopt", + 296: "sendmsg", + 297: "recvmsg", + 298: "semop", + 299: "semget", + 300: "semctl", + 301: "msgsnd", + 302: "msgrcv", + 303: "msgget", + 304: "msgctl", + 305: "shmat", + 306: "shmdt", + 307: "shmget", + 308: "shmctl", + 309: "add_key", + 310: "request_key", + 311: "keyctl", + 312: "semtimedop", + 314: "ioprio_set", + 315: "ioprio_get", + 316: "inotify_init", + 317: "inotify_add_watch", + 318: "inotify_rm_watch", + 319: "mbind", + 320: "get_mempolicy", + 321: "set_mempolicy", + 322: "openat", + 323: "mkdirat", + 324: "mknodat", + 325: "fchownat", + 326: "futimesat", + 327: "fstatat64", + 328: "unlinkat", + 329: "renameat", + 330: "linkat", + 331: "symlinkat", + 332: "readlinkat", + 333: "fchmodat", + 334: "faccessat", + 335: "pselect6", + 336: "ppoll", + 337: "unshare", + 338: "set_robust_list", + 339: "get_robust_list", + 340: "splice", + 341: "sync_file_range2", + 342: "tee", + 343: "vmsplice", + 344: "move_pages", + 345: "getcpu", + 346: "epoll_pwait", + 347: "kexec_load", + 348: "utimensat", + 349: "signalfd", + 350: "timerfd_create", + 351: "eventfd", + 352: "fallocate", + 353: "timerfd_settime", + 354: "timerfd_gettime", + 355: "signalfd4", + 356: "eventfd2", + 357: "epoll_create1", + 358: "dup3", + 359: "pipe2", + 360: "inotify_init1", + 361: "preadv", + 362: "pwritev", + 363: "rt_tgsigqueueinfo", + 364: "perf_event_open", + 365: "recvmmsg", + 366: "accept4", + 367: "fanotify_init", + 368: "fanotify_mark", + 369: "prlimit64", + 370: "name_to_handle_at", + 371: "open_by_handle_at", + 372: "clock_adjtime", + 373: "syncfs", + 374: "sendmmsg", + 375: "setns", + 376: "process_vm_readv", + 377: "process_vm_writev", + 378: "kcmp", + 379: "finit_module", + 380: "sched_setattr", + 381: "sched_getattr", + 382: "renameat2", + 383: "seccomp", + 384: "getrandom", + 385: "memfd_create", + 386: "bpf", + 387: "execveat", + 388: "userfaultfd", + 389: "membarrier", + 390: "mlock2", + 391: "copy_file_range", + 392: "preadv2", + 393: "pwritev2", + 394: "pkey_mprotect", + 395: "pkey_alloc", + 396: "pkey_free", + 397: "statx", + 398: "rseq", + 399: "io_pgetevents", + 400: "migrate_pages", + 401: "kexec_file_load", + 403: "clock_gettime64", + 404: "clock_settime64", + 405: "clock_adjtime64", + 406: "clock_getres_time64", + 407: "clock_nanosleep_time64", + 408: "timer_gettime64", + 409: "timer_settime64", + 410: "timerfd_gettime64", + 411: "timerfd_settime64", + 412: "utimensat_time64", + 413: "pselect6_time64", + 414: "ppoll_time64", + 416: "io_pgetevents_time64", + 417: "recvmmsg_time64", + 418: "mq_timedsend_time64", + 419: "mq_timedreceive_time64", + 420: "semtimedop_time64", + 421: "rt_sigtimedwait_time64", + 422: "futex_time64", + 423: "sched_rr_get_interval_time64", + 424: "pidfd_send_signal", + 425: "io_uring_setup", + 426: "io_uring_enter", + 427: "io_uring_register", + 428: "open_tree", + 429: "move_mount", + 430: "fsopen", + 431: "fsconfig", + 432: "fsmount", + 433: "fspick", + 434: "pidfd_open", + 435: "clone3", + 436: "close_range", + 437: "openat2", + 438: "pidfd_getfd", + 439: "faccessat2", + 440: "process_madvise", + 441: "epoll_pwait2", + 442: "mount_setattr", + 443: "quotactl_fd", + 444: "landlock_create_ruleset", + 445: "landlock_add_rule", + 446: "landlock_restrict_self", + 448: "process_mrelease", + 449: "futex_waitv", + 450: "set_mempolicy_home_node", +} diff --git a/capa/main.py b/capa/main.py index ae8421560..ab321fde0 100644 --- a/capa/main.py +++ b/capa/main.py @@ -20,7 +20,7 @@ import itertools import contextlib import collections -from typing import Any, Dict, List, Tuple, Callable, Optional +from typing import Any, Dict, List, Tuple, Callable, Optional, Union from pathlib import Path import halo @@ -78,6 +78,7 @@ FORMAT_RESULT, ) from capa.features.address import NO_ADDRESS, Address +from capa.features.extractors.viv.syscall import resolve_syscall_functions from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor RULES_PATH_DEFAULT_STRING = "(embedded rules)" @@ -98,6 +99,7 @@ E_INVALID_FILE_OS = 18 E_UNSUPPORTED_IDA_VERSION = 19 E_UNSUPPORTED_GHIDRA_VERSION = 20 +UNSUPPORTED = "UNSUPPORTED" logger = logging.getLogger("capa") @@ -390,10 +392,13 @@ def is_supported_format(sample: Path) -> bool: return len(list(capa.features.extractors.common.extract_format(taste))) == 1 -def is_supported_arch(sample: Path) -> bool: +def is_supported_arch(sample: Path) -> Union[str, int, float, bytes]: buf = sample.read_bytes() - return len(list(capa.features.extractors.common.extract_arch(buf))) == 1 + arch = list(capa.features.extractors.common.extract_arch(buf)) + if len(arch) != 1: + return UNSUPPORTED + return arch[0][0].value def get_arch(sample: Path) -> str: @@ -532,11 +537,13 @@ def get_extractor( UnsupportedArchError UnsupportedOSError """ + arch = None if format_ not in (FORMAT_SC32, FORMAT_SC64): if not is_supported_format(path): raise UnsupportedFormatError() - if not is_supported_arch(path): + arch = is_supported_arch(path) + if arch == UNSUPPORTED: raise UnsupportedArchError() if os_ == OS_AUTO and not is_supported_os(path): @@ -581,8 +588,6 @@ def get_extractor( return capa.features.extractors.pefile.PefileFeatureExtractor(path) elif backend == BACKEND_VIV: - import capa.features.extractors.viv.extractor - with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress): vw = get_workspace(path, format_, sigpaths) @@ -596,6 +601,13 @@ def get_extractor( else: logger.debug("CAPA_SAVE_WORKSPACE unset, not saving workspace") + if arch == "ARM": + import capa.features.extractors.viv.extractor + + return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os_, arm=True) + + import capa.features.extractors.viv.extractor + return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os_) else: @@ -1279,6 +1291,10 @@ def main(argv: Optional[List[str]] = None): meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor) + # for ELF handle staticly linked library + if format_ == FORMAT_ELF: + resolve_syscall_functions(extractor) + capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet) meta.analysis.feature_counts = counts["feature_counts"]