diff --git a/Cpp2IL.Core/Il2CppApiFunctions/BaseKeyFunctionAddresses.cs b/Cpp2IL.Core/Il2CppApiFunctions/BaseKeyFunctionAddresses.cs index bb5c7fa5..65aed934 100644 --- a/Cpp2IL.Core/Il2CppApiFunctions/BaseKeyFunctionAddresses.cs +++ b/Cpp2IL.Core/Il2CppApiFunctions/BaseKeyFunctionAddresses.cs @@ -146,7 +146,7 @@ private void FindThunks() if (il2cpp_type_get_object != 0) { - Logger.Verbose("\t\tMapping il2cpp_resolve_icall to Reflection::GetTypeObject..."); + Logger.Verbose("\t\tMapping il2cpp_type_get_object to Reflection::GetTypeObject..."); il2cpp_vm_reflection_get_type_object = FindFunctionThisIsAThunkOf(il2cpp_type_get_object); Logger.VerboseNewline($"Found at 0x{il2cpp_vm_reflection_get_type_object:X}"); } diff --git a/Cpp2IL.InstructionSets.All/AllInstructionSets.cs b/Cpp2IL.InstructionSets.All/AllInstructionSets.cs index 7b26cd02..90056b9f 100644 --- a/Cpp2IL.InstructionSets.All/AllInstructionSets.cs +++ b/Cpp2IL.InstructionSets.All/AllInstructionSets.cs @@ -1,4 +1,5 @@ using Cpp2IL.Core.Api; +using Cpp2IL.InstructionSets.ArmV7; using Cpp2IL.InstructionSets.ArmV8; using Cpp2IL.InstructionSets.Wasm; using Cpp2IL.InstructionSets.X86; @@ -10,6 +11,7 @@ public static class AllInstructionSets public static void Register() { X86InstructionSet.RegisterInstructionSet(); + ArmV7InstructionSet.RegisterInstructionSet(); ArmV8InstructionSet.RegisterInstructionSet(); WasmInstructionSet.RegisterInstructionSet(); OutputFormatRegistry.Register(); diff --git a/Cpp2IL.InstructionSets.All/Cpp2IL.InstructionSets.All.csproj b/Cpp2IL.InstructionSets.All/Cpp2IL.InstructionSets.All.csproj index d87750f3..0d6a089f 100644 --- a/Cpp2IL.InstructionSets.All/Cpp2IL.InstructionSets.All.csproj +++ b/Cpp2IL.InstructionSets.All/Cpp2IL.InstructionSets.All.csproj @@ -6,6 +6,7 @@ + diff --git a/Cpp2IL.InstructionSets.ArmV7/ArmV7InstructionSet.cs b/Cpp2IL.InstructionSets.ArmV7/ArmV7InstructionSet.cs new file mode 100644 index 00000000..abbf5092 --- /dev/null +++ b/Cpp2IL.InstructionSets.ArmV7/ArmV7InstructionSet.cs @@ -0,0 +1,66 @@ +using System.Text; +using Cpp2IL.Core.Api; +using Cpp2IL.Core.Il2CppApiFunctions; +using Cpp2IL.Core.ISIL; +using Cpp2IL.Core.Model.Contexts; +using LibCpp2IL; + +namespace Cpp2IL.InstructionSets.ArmV7; + +public class ArmV7InstructionSet : Cpp2IlInstructionSet +{ + public static void RegisterInstructionSet() + { + InstructionSetRegistry.RegisterInstructionSet(DefaultInstructionSets.ARM_V7); + } + + public override Memory GetRawBytesForMethod(MethodAnalysisContext context, bool isAttributeGenerator) + { + if (ArmV7Utils.TryGetMethodBodyBytesFast(context.UnderlyingPointer, context is AttributeGeneratorMethodAnalysisContext) is { } ret) + return ret; + + ArmV7Utils.DisassembleManagedMethod(context.UnderlyingPointer, out var endVirtualAddress); + + var start = (int)context.AppContext.Binary.MapVirtualAddressToRaw(context.UnderlyingPointer); + var end = (int)context.AppContext.Binary.MapVirtualAddressToRaw(endVirtualAddress); + + return context.AppContext.Binary.GetRawBinaryContent().AsMemory(start, end - start); + } + + public override List GetIsilFromMethod(MethodAnalysisContext context) + { + throw new NotImplementedException(); + } + + public override BaseKeyFunctionAddresses CreateKeyFunctionAddressesInstance() + { + return new ArmV7KeyFunctionAddresses(); + } + + public override unsafe string PrintAssembly(MethodAnalysisContext context) + { + var sb = new StringBuilder(); + var first = true; + + using (ArmV7Utils.Disassembler.AllocInstruction(out var instruction)) + { + fixed (byte* code = context.RawBytes.Span) + { + var size = (nuint)context.RawBytes.Length; + var address = context.UnderlyingPointer; + while (ArmV7Utils.Disassembler.UnsafeIterate(&code, &size, &address, instruction)) + { + if (!first) + { + sb.AppendLine(); + first = false; + } + + sb.Append("0x").Append(address.ToString("X")).Append(" ").AppendLine(instruction->ToString()); + } + } + } + + return sb.ToString(); + } +} diff --git a/Cpp2IL.InstructionSets.ArmV7/ArmV7KeyFunctionAddresses.cs b/Cpp2IL.InstructionSets.ArmV7/ArmV7KeyFunctionAddresses.cs new file mode 100644 index 00000000..6f037962 --- /dev/null +++ b/Cpp2IL.InstructionSets.ArmV7/ArmV7KeyFunctionAddresses.cs @@ -0,0 +1,144 @@ +using CapstoneSharp.Arm; +using Cpp2IL.Core.Il2CppApiFunctions; +using Cpp2IL.Core.Logging; +using LibCpp2IL; +using LibCpp2IL.Reflection; + +namespace Cpp2IL.InstructionSets.ArmV7; + +public class ArmV7KeyFunctionAddresses : BaseKeyFunctionAddresses +{ + private List? _cachedDisassembledBytes; + + private List DisassembleTextSection() + { + if (_cachedDisassembledBytes == null) + { + var toDisasm = LibCpp2IlMain.Binary!.GetEntirePrimaryExecutableSection(); + _cachedDisassembledBytes = ArmV7Utils.Disassembler.Iterate(toDisasm, LibCpp2IlMain.Binary.GetVirtualAddressOfPrimaryExecutableSection()).ToList(); + } + + return _cachedDisassembledBytes; + } + + protected override IEnumerable FindAllThunkFunctions(ulong addr, uint maxBytesBack = 0, params ulong[] addressesToIgnore) + { + //Disassemble .text + var disassembly = DisassembleTextSection(); + + //Find all jumps to the target address + var matchingJmps = disassembly.Where(i => i.IsBranchingTo((int)addr)).ToList(); + + foreach (var matchingJmp in matchingJmps) + { + if (addressesToIgnore.Contains(matchingJmp.Address)) continue; + + var backtrack = 0; + var idx = disassembly.IndexOf(matchingJmp); + + do + { + //About the only way we have of checking for a thunk is if there is an all-zero instruction or another unconditional branch just before this + //Or a ret, but that's less reliable. + //if so, this is probably a thunk. + if (idx - backtrack > 0) + { + var prevInstruction = disassembly[idx - backtrack - 1]; + + if (addressesToIgnore.Contains(prevInstruction.Address)) + { + backtrack++; + continue; + } + + if (prevInstruction.IsSkippedData && prevInstruction.Bytes.IsAllZero()) + { + //All-zero instructions are a match + yield return matchingJmp.Address - (ulong)(backtrack * 4); + break; + } + + if (prevInstruction.Id is CapstoneArmInstructionId.STR) + { + //ADRP instructions are a deal breaker - this means we're loading something from memory, so it's not a simple thunk + break; + } + + if (prevInstruction.Id is CapstoneArmInstructionId.B or CapstoneArmInstructionId.BL) + { + //Previous branches are a match + yield return matchingJmp.Address - (ulong)(backtrack * 4); + break; + } + } + + //We're working in the .text section here so we have few symbols, so there's no point looking for the previous one. + + backtrack++; + } while (backtrack * 4 < maxBytesBack); + } + } + + protected override ulong GetObjectIsInstFromSystemType() + { + Logger.Verbose("\tTrying to use System.Type::IsInstanceOfType to find il2cpp::vm::Object::IsInst..."); + var typeIsInstanceOfType = LibCpp2IlReflection.GetType("Type", "System")?.Methods?.FirstOrDefault(m => m.Name == "IsInstanceOfType"); + if (typeIsInstanceOfType == null) + { + Logger.VerboseNewline("Type or method not found, aborting."); + return 0; + } + + //IsInstanceOfType is a very simple ICall, that looks like this: + // Il2CppClass* klass = vm::Class::FromIl2CppType(type->type.type); + // return il2cpp::vm::Object::IsInst(obj, klass) != NULL; + //The last call is to Object::IsInst + + Logger.Verbose($"IsInstanceOfType found at 0x{typeIsInstanceOfType.MethodPointer:X}..."); + var instructions = ArmV7Utils.DisassembleManagedMethod(typeIsInstanceOfType.MethodPointer); + + var lastCall = instructions.LastOrDefault(i => i.Id == CapstoneArmInstructionId.BL); + + if (lastCall == null) + { + Logger.VerboseNewline("Method does not match expected signature. Aborting."); + return 0; + } + + var target = lastCall.GetBranchTarget(); + Logger.VerboseNewline($"Success. IsInst found at 0x{target:X}"); + return (ulong)target; + } + + protected override ulong FindFunctionThisIsAThunkOf(ulong thunkPtr, bool prioritiseCall = false) + { + var instructions = ArmV7Utils.DisassembleFunction(thunkPtr); + + try + { + var target = prioritiseCall ? CapstoneArmInstructionId.BL : CapstoneArmInstructionId.B; + var matchingCall = instructions.FirstOrDefault(i => i.Id == target); + + if (matchingCall == null) + { + target = target == CapstoneArmInstructionId.BL ? CapstoneArmInstructionId.B : CapstoneArmInstructionId.BL; + matchingCall = instructions.First(i => i.Id == target); + } + + return (ulong)matchingCall.GetBranchTarget(); + } + catch (Exception) + { + return 0; + } + } + + protected override int GetCallerCount(ulong toWhere) + { + //Disassemble .text + var disassembly = DisassembleTextSection(); + + //Find all jumps to the target address + return disassembly.Count(i => i.IsBranchingTo((int)toWhere)); + } +} diff --git a/Cpp2IL.InstructionSets.ArmV7/ArmV7Utils.cs b/Cpp2IL.InstructionSets.ArmV7/ArmV7Utils.cs new file mode 100644 index 00000000..5f922c1c --- /dev/null +++ b/Cpp2IL.InstructionSets.ArmV7/ArmV7Utils.cs @@ -0,0 +1,125 @@ +using System.Runtime.InteropServices; +using CapstoneSharp.Arm; +using Cpp2IL.Core.Utils; +using LibCpp2IL; + +namespace Cpp2IL.InstructionSets.ArmV7; + +internal static class ArmV7Utils +{ + private static CapstoneArmDisassembler? _disassembler; + + // TODO dispose this + public static CapstoneArmDisassembler Disassembler => _disassembler ??= new CapstoneArmDisassembler + { + EnableInstructionDetails = true, EnableSkipData = true, + }; + + public static bool IsAllZero(this ReadOnlySpan span) + { + if (MemoryMarshal.TryRead(span, out var value)) + { + return value == 0; + } + + foreach (var b in span) + { + if (b != 0) + { + return true; + } + } + + return true; + } + + public static int GetBranchTarget(this CapstoneArmInstruction instruction) + { + if (instruction.Id is not (CapstoneArmInstructionId.B or CapstoneArmInstructionId.BL)) + throw new InvalidOperationException("Branch target not available for this instruction, must be a B or BL"); + + return instruction.Details.ArchDetails.Operands[0].Immediate; + } + + public static bool IsBranchingTo(this CapstoneArmInstruction instruction, int toWhere) + { + if (instruction.Id is not (CapstoneArmInstructionId.B or CapstoneArmInstructionId.BL)) + return false; + + return instruction.GetBranchTarget() == toWhere; + } + + public static Memory? TryGetMethodBodyBytesFast(ulong virtualAddress, bool isCAGen) + { + var startOfNext = MiscUtils.GetAddressOfNextFunctionStart(virtualAddress); + + var length = (startOfNext - virtualAddress); + if (isCAGen && length > 50_000) + return null; + + if (startOfNext <= 0) + //We have to fall through to default behavior for the last method because we cannot accurately pinpoint its end + return null; + + var rawStartOfNextMethod = LibCpp2IlMain.Binary!.MapVirtualAddressToRaw(startOfNext); + + var rawStart = LibCpp2IlMain.Binary.MapVirtualAddressToRaw(virtualAddress); + if (rawStartOfNextMethod < rawStart) + rawStartOfNextMethod = LibCpp2IlMain.Binary.RawLength; + + return LibCpp2IlMain.Binary.GetRawBinaryContent().AsMemory((int)rawStart, (int)(rawStartOfNextMethod - rawStart)); + } + + public static List DisassembleFunction(ulong virtualAddress, int count = -1) + { + return DisassembleFunction(virtualAddress, out _, count); + } + + public static List DisassembleFunction(ulong virtualAddress, out ulong endVirtualAddress, int count = -1) + { + // Unmanaged function, look for first b + var pos = (int)LibCpp2IlMain.Binary!.MapVirtualAddressToRaw(virtualAddress); + var allBytes = LibCpp2IlMain.Binary.GetRawBinaryContent(); + + var instructions = new List(); + + endVirtualAddress = virtualAddress; + foreach (var instruction in Disassembler.Iterate(allBytes.AsSpan(pos), virtualAddress)) + { + instructions.Add(instruction); + endVirtualAddress = instruction.Address; + if (instruction.Id == CapstoneArmInstructionId.B) break; + if (count != -1 && instructions.Count >= count) break; + } + + return instructions; + } + + public static IEnumerable DisassembleManagedMethod(ulong virtualAddress, int count = -1) + { + return DisassembleManagedMethod(virtualAddress, out _, count); + } + + public static IEnumerable DisassembleManagedMethod(ulong virtualAddress, out ulong endVirtualAddress, int count = -1) + { + var startOfNext = MiscUtils.GetAddressOfNextFunctionStart(virtualAddress); + + // We have to fall through to default behavior for the last method because we cannot accurately pinpoint its end + if (startOfNext > 0) + { + var rawStartOfNextMethod = LibCpp2IlMain.Binary!.MapVirtualAddressToRaw(startOfNext); + + var rawStart = LibCpp2IlMain.Binary.MapVirtualAddressToRaw(virtualAddress); + if (rawStartOfNextMethod < rawStart) + rawStartOfNextMethod = LibCpp2IlMain.Binary.RawLength; + + var bytes = LibCpp2IlMain.Binary.GetRawBinaryContent().AsMemory((int)rawStart, (int)(rawStartOfNextMethod - rawStart)); + + endVirtualAddress = virtualAddress + (ulong)bytes.Length; + var instructions = Disassembler.Iterate(bytes, virtualAddress); + return count == -1 ? instructions : instructions.Take(count); + } + + return DisassembleFunction(virtualAddress, out endVirtualAddress, count); + } +} diff --git a/Cpp2IL.InstructionSets.ArmV7/Cpp2IL.InstructionSets.ArmV7.csproj b/Cpp2IL.InstructionSets.ArmV7/Cpp2IL.InstructionSets.ArmV7.csproj new file mode 100644 index 00000000..a034d770 --- /dev/null +++ b/Cpp2IL.InstructionSets.ArmV7/Cpp2IL.InstructionSets.ArmV7.csproj @@ -0,0 +1,12 @@ + + + netstandard2.0 + true + true + + + + + + + diff --git a/Cpp2IL.sln b/Cpp2IL.sln index b2102e57..a85acb1c 100644 --- a/Cpp2IL.sln +++ b/Cpp2IL.sln @@ -55,6 +55,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cpp2IL.InstructionSets.ArmV EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cpp2IL.InstructionSets.Wasm", "Cpp2IL.InstructionSets.Wasm\Cpp2IL.InstructionSets.Wasm.csproj", "{69F27003-2D6B-42E3-BB69-C542CA16E0D8}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cpp2IL.InstructionSets.ArmV7", "Cpp2IL.InstructionSets.ArmV7\Cpp2IL.InstructionSets.ArmV7.csproj", "{BBBC3D51-9F2E-4AB3-B4BE-195A01568F1D}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -125,6 +127,10 @@ Global {69F27003-2D6B-42E3-BB69-C542CA16E0D8}.Debug|Any CPU.Build.0 = Debug|Any CPU {69F27003-2D6B-42E3-BB69-C542CA16E0D8}.Release|Any CPU.ActiveCfg = Release|Any CPU {69F27003-2D6B-42E3-BB69-C542CA16E0D8}.Release|Any CPU.Build.0 = Release|Any CPU + {BBBC3D51-9F2E-4AB3-B4BE-195A01568F1D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {BBBC3D51-9F2E-4AB3-B4BE-195A01568F1D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BBBC3D51-9F2E-4AB3-B4BE-195A01568F1D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {BBBC3D51-9F2E-4AB3-B4BE-195A01568F1D}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -135,6 +141,7 @@ Global {CC7781FC-1A0E-441D-9551-412509BE63DF} = {6B0AAAA9-3C29-4AD9-84CB-47DB34134C82} {6B9023CB-8EB3-4738-B079-FD7E7AE76023} = {6B0AAAA9-3C29-4AD9-84CB-47DB34134C82} {69F27003-2D6B-42E3-BB69-C542CA16E0D8} = {6B0AAAA9-3C29-4AD9-84CB-47DB34134C82} + {BBBC3D51-9F2E-4AB3-B4BE-195A01568F1D} = {6B0AAAA9-3C29-4AD9-84CB-47DB34134C82} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {E9A45B2C-AAEF-4D66-ADD6-7DD234DA3F39}