Skip to content

Commit

Permalink
Patch EXTRQ
Browse files Browse the repository at this point in the history
  • Loading branch information
OFFTKP committed Sep 18, 2024
1 parent b42034d commit 75ce774
Show file tree
Hide file tree
Showing 7 changed files with 343 additions and 45 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ set(COMMON src/common/logging/backend.cpp
src/common/polyfill_thread.h
src/common/rdtsc.cpp
src/common/rdtsc.h
src/common/signal_context.h
src/common/signal_context.cpp
src/common/singleton.h
src/common/slot_vector.h
src/common/string_util.cpp
Expand Down
90 changes: 90 additions & 0 deletions src/common/signal_context.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "common/arch.h"
#include "common/assert.h"
#include "common/signal_context.h"

#ifdef _WIN32
#include <windows.h>
#else
#include <sys/ucontext.h>
#endif

namespace Common {

void* GetXmmPointer(void* ctx, u8 index) {
#if defined(_WIN32)
#define CASE(index) \
case index: \
return (void*)(&((EXCEPTION_POINTERS*)ctx)->ContextRecord->Xmm##index.Low)
#elif defined(__APPLE__)
#define CASE(index) \
case index: \
return (void*)(&((ucontext_t*)ctx)->uc_mcontext->__fs.__fpu_xmm##index);
#else
#define CASE(index) \
case index: \
return (void*)(&((ucontext_t*)ctx)->uc_mcontext.fpregs->_xmm[index].element[0])
#endif
switch (index) {
CASE(0);
CASE(1);
CASE(2);
CASE(3);
CASE(4);
CASE(5);
CASE(6);
CASE(7);
CASE(8);
CASE(9);
CASE(10);
CASE(11);
CASE(12);
CASE(13);
CASE(14);
CASE(15);
default: {
UNREACHABLE_MSG("Invalid XMM register index: {}", index);
return nullptr;
}
}
#undef CASE
}

void* GetRip(void* ctx) {
#if defined(_WIN32)
return &((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip;
#elif defined(__APPLE__)
return &((ucontext_t*)ctx)->uc_mcontext->__ss.__rip;
#else
return &((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP];
#endif
}

void IncrementRip(void* ctx, u64 length) {
void* rip = GetRip(ctx);
u64 new_rip;
memcpy(&new_rip, rip, sizeof(u64));
new_rip += length;
memcpy(rip, &new_rip, sizeof(u64));
}

bool IsWriteError(void* ctx) {
#if defined(_WIN32)
return ((EXCEPTION_POINTERS*)ctx)->ExceptionRecord->ExceptionInformation[0] == 1;
#elif defined(__APPLE__)
#if defined(ARCH_X86_64)
return ((ucontext_t*)ctx)->uc_mcontext->__es.__err & 0x2;
#elif defined(ARCH_ARM64)
return ((ucontext_t*)ctx)->uc_mcontext->__es.__esr & 0x40;
#endif
#else
#if defined(ARCH_X86_64)
return ((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ERR] & 0x2;
#else
#error "Unsupported architecture"
#endif
#endif
}
} // namespace Common
18 changes: 18 additions & 0 deletions src/common/signal_context.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include "common/types.h"

namespace Common {

void* GetXmmPointer(void* ctx, u8 index);

void* GetRip(void* ctx);

void IncrementRip(void* ctx, u64 length);

bool IsWriteError(void* ctx);

} // namespace Common
218 changes: 213 additions & 5 deletions src/core/cpu_patches.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
#include <set>
#include <Zydis/Zydis.h>
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/decoder.h"
#include "common/signal_context.h"
#include "common/types.h"
#include "core/signals.h"
#include "core/tls.h"
Expand All @@ -27,6 +30,16 @@

using namespace Xbyak::util;

#define MAYBE_AVX(OPCODE, ...) \
[&] { \
Cpu cpu; \
if (cpu.has(Cpu::tAVX)) { \
c.v##OPCODE(__VA_ARGS__); \
} else { \
c.OPCODE(__VA_ARGS__); \
} \
}()

namespace Core {

static Xbyak::Reg ZydisToXbyakRegister(const ZydisRegister reg) {
Expand Down Expand Up @@ -587,6 +600,114 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe

#endif // __APPLE__

static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
Cpu cpu;
return !cpu.has(Cpu::tSSE4a);
}

static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;

ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER, "operand 0 must be a register");

const auto dst = ZydisToXbyakRegisterOperand(operands[0]);

ASSERT_MSG(dst.isXMM(), "operand 0 must be an XMM register");

Xbyak::Xmm xmm_dst = *reinterpret_cast<const Xbyak::Xmm*>(&dst);

if (immediateForm) {
u8 length = operands[1].imm.value.u & 0x3F;
u8 index = operands[2].imm.value.u & 0x3F;
if (length == 0) {
length = 64;
}

LOG_DEBUG(Core, "Patching immediate form EXTRQ, length: {}, index: {}", length, index);

const Xbyak::Reg64 scratch1 = rax;
const Xbyak::Reg64 scratch2 = rcx;

// Set rsp to before red zone and save scratch registers
c.lea(rsp, ptr[rsp - 128]);
c.pushfq();
c.push(scratch1);
c.push(scratch2);

u64 mask = (1ULL << length) - 1;

// Get lower qword from xmm register
MAYBE_AVX(movq, scratch1, xmm_dst);

if (index != 0) {
c.shr(scratch1, index);
}

// We need to move mask to a register because we can't use all the possible
// immediate values with `and reg, imm32`
c.mov(scratch2, mask);
c.and_(scratch1, scratch2);

// Writeback to xmm register, extrq instruction says top 64-bits are undefined so we don't
// care to preserve them
MAYBE_AVX(movq, xmm_dst, scratch1);

c.pop(scratch2);
c.pop(scratch1);
c.popfq();
c.lea(rsp, ptr[rsp + 128]);
} else {
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[0].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[0].reg.value <= ZYDIS_REGISTER_XMM15 &&
operands[1].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[1].reg.value <= ZYDIS_REGISTER_XMM15,
"Unexpected operand types for EXTRQ instruction");

const auto src = ZydisToXbyakRegisterOperand(operands[1]);

ASSERT_MSG(src.isXMM(), "operand 1 must be an XMM register");

Xbyak::Xmm xmm_src = *reinterpret_cast<const Xbyak::Xmm*>(&src);

const Xbyak::Reg64 scratch1 = rax;
const Xbyak::Reg64 scratch2 = rcx;
const Xbyak::Reg64 mask = rdx;

c.lea(rsp, ptr[rsp - 128]);
c.pushfq();
c.push(scratch1);
c.push(scratch2);
c.push(mask);

// Construct the mask out of the length that resides in bottom 6 bits of source xmm
MAYBE_AVX(movq, scratch1, xmm_src);
c.mov(scratch2, scratch1);
c.and_(scratch2, 0x3F);
c.mov(mask, 1);
c.shl(mask, cl);
c.dec(mask);

// Get the shift amount and store it in scratch2
c.shr(scratch1, 8);
c.and_(scratch1, 0x3F);
c.mov(scratch2, scratch1); // cl now contains the shift amount

MAYBE_AVX(movq, scratch1, xmm_dst);
c.shr(scratch1, cl);
c.and_(scratch1, mask);
MAYBE_AVX(movq, xmm_dst, scratch1);

c.pop(mask);
c.pop(scratch2);
c.pop(scratch1);
c.popfq();
c.lea(rsp, ptr[rsp + 128]);
}
}

using PatchFilter = bool (*)(const ZydisDecodedOperand*);
using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
struct PatchInfo {
Expand All @@ -608,6 +729,8 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
{ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}},
#endif

{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},

#ifdef __APPLE__
// Patches for instruction sets not supported by Rosetta 2.
// BMI1
Expand Down Expand Up @@ -671,14 +794,23 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {

if (Patches.contains(instruction.mnemonic)) {
const auto& patch_info = Patches.at(instruction.mnemonic);
bool needs_trampoline = patch_info.trampoline;
if (patch_info.filter(operands)) {
auto& patch_gen = module->patch_gen;

if (needs_trampoline && instruction.length < 5) {
// Trampoline is needed but instruction is too short to patch.
// Return false and length to fall back to the illegal instruction handler,
// or to signal to AOT compilation that this instruction should be skipped and
// handled at runtime.
return std::make_pair(false, instruction.length);
}

// Reset state and move to current code position.
patch_gen.reset();
patch_gen.setSize(code - patch_gen.getCode());

if (patch_info.trampoline) {
if (needs_trampoline) {
auto& trampoline_gen = module->trampoline_gen;
const auto trampoline_ptr = trampoline_gen.getCurr();

Expand Down Expand Up @@ -714,6 +846,78 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
return std::make_pair(false, instruction.length);
}

#if defined(ARCH_X86_64)

static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
const auto status =
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);

switch (instruction.mnemonic) {
case ZYDIS_MNEMONIC_EXTRQ: {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
if (immediateForm) {
LOG_ERROR(Core, "EXTRQ immediate form should have been patched at code address: {}",
fmt::ptr(code_address));
return false;
} else {
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[0].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[0].reg.value <= ZYDIS_REGISTER_XMM15 &&
operands[1].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[1].reg.value <= ZYDIS_REGISTER_XMM15,
"Unexpected operand types for EXTRQ instruction");

const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0;
const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0;

const auto dst = Common::GetXmmPointer(ctx, dstIndex);
const auto src = Common::GetXmmPointer(ctx, srcIndex);

u64 lowQWordSrc;
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));

u64 lowQWordDst;
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));

u64 mask = lowQWordSrc & 0x3F;
mask = (1ULL << mask) - 1;

u64 shift = (lowQWordSrc >> 8) & 0x3F;

lowQWordDst >>= shift;
lowQWordDst &= mask;

memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));

Common::IncrementRip(ctx, instruction.length);

return true;
}
break;
}
default: {
LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}",
fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic));
return false;
}
}

UNREACHABLE();
}
#elif defined(ARCH_ARM64)
// These functions shouldn't be needed for ARM as it will use a JIT so there's no need to patch
// instructions.
static bool TryExecuteIllegalInstruction(void*, void*) {
return false;
}
#else
#error "Unsupported architecture"
#endif

static bool TryPatchJit(void* code_address) {
auto* code = static_cast<u8*>(code_address);
auto* module = GetModule(code);
Expand Down Expand Up @@ -746,12 +950,16 @@ static void TryPatchAot(void* code_address, u64 code_size) {
}
}

static bool PatchesAccessViolationHandler(void* code_address, void* fault_address, bool is_write) {
return TryPatchJit(code_address);
static bool PatchesAccessViolationHandler(void* context, void* /* fault_address */) {
return TryPatchJit(Common::GetRip(context));
}

static bool PatchesIllegalInstructionHandler(void* code_address) {
return TryPatchJit(code_address);
static bool PatchesIllegalInstructionHandler(void* context) {
void* code_address = Common::GetRip(context);
if (!TryPatchJit(code_address)) {
return TryExecuteIllegalInstruction(context, code_address);
}
return true;
}

static void PatchesInit() {
Expand Down
Loading

0 comments on commit 75ce774

Please sign in to comment.