From 3590ad90996411ee6bb7092516dd72965dbb668d Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 13 Mar 2023 05:10:34 +0100 Subject: [PATCH] PPCRec: Implement MFCR and MTCRF --- .../Recompiler/BackendX64/BackendX64.cpp | 33 - .../Recompiler/BackendX64/BackendX64FPU.cpp | 36 - src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 17 +- .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 38 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 32 +- .../Recompiler/IML/IMLInstruction.cpp | 41 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 18 - .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 615 +----------------- .../IML/IMLRegisterAllocatorRanges.cpp | 1 + .../HW/Espresso/Recompiler/IML/IMLSegment.h | 15 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 73 +-- .../Recompiler/PPCRecompilerImlGen.cpp | 62 +- 12 files changed, 60 insertions(+), 921 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index dfb94f278..e32d1ea8e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -680,31 +680,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0); x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) - { - DEBUG_BREAK; - //uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; - //x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - //for(sint32 f=0; f<32; f++) - //{ - // x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); - // x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - //} - } - else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) - { - DEBUG_BREAK; - //uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; - //uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - //for (sint32 f = 0; f < 32; f++) - //{ - // if(((crBitMask >> f) & 1) == 0) - // continue; - // x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); - // x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f); - // x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); - //} - } else { debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); @@ -1582,14 +1557,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo { // no op } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) - { - PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD ) { if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false ) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 8db27e41e..cff46a2d0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -34,42 +34,6 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) return (x86Assembler64::GPR8_REX)regId; } -void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - uint32 name = imlInstruction->op_r_name.name; - uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); - if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) - { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); - } - else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) - { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); - } - else - { - cemu_assert_debug(false); - } -} - -void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - uint32 name = imlInstruction->op_r_name.name; - uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); - if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) - { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); - } - else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) - { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); - } - else - { - cemu_assert_debug(false); - } -} - void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, IMLReg registerGQR) { // load GQR diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 0f1a0803b..b58fdfa8d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -4,24 +4,13 @@ #include "IMLSegment.h" // analyzer -struct PPCRecCRTracking_t -{ - uint32 readCRBits; - uint32 writtenCRBits; -}; - bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment); -bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction); // optimizer passes -// todo - rename -//bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); -//bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); +void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext); +void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* ppcImlGenContext); // debug -void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); +void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index 1b348c4cf..77403e1b8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -52,40 +52,4 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) } } return false; -} - -/* -* Returns true if the instruction can overwrite CR (depending on value of ->crRegister) -*/ -bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) -{ - if (imlInstruction->type == PPCREC_IML_TYPE_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R) - return true; - - // new instructions - if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY) - return true; - - - return false; -} +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index a6b4925c8..d295f0aa8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -60,7 +60,7 @@ std::string IMLDebug_GetRegName(IMLReg r) regName.append("r"); break; default: - __debugbreak(); + DEBUG_BREAK; } regName.append(fmt::format("{}", regId)); return regName; @@ -417,36 +417,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.addFmt("MACRO ukn operation {}", inst.operation); } } - else if (inst.type == PPCREC_IML_TYPE_FPR_R_NAME) - { - strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.regR.GetRegID(), inst.op_r_name.name); - if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) - { - strOutput.addFmt("fpr{}", inst.op_r_name.name - PPCREC_NAME_FPR0); - } - else if (inst.op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && inst.op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0 + 999)) - { - strOutput.addFmt("tempFpr{}", inst.op_r_name.name - PPCREC_NAME_TEMPORARY_FPR0); - } - else - strOutput.add("ukn"); - strOutput.add(")"); - } - else if (inst.type == PPCREC_IML_TYPE_FPR_NAME_R) - { - strOutput.addFmt("name_{} (", inst.op_r_name.name); - if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) - { - strOutput.addFmt("fpr{}", inst.op_r_name.name - PPCREC_NAME_FPR0); - } - else if (inst.op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && inst.op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0 + 999)) - { - strOutput.addFmt("tempFpr{}", inst.op_r_name.name - PPCREC_NAME_TEMPORARY_FPR0); - } - else - strOutput.add("ukn"); - strOutput.addFmt(") = {}", IMLDebug_GetRegName(inst.op_r_name.regR)); - } else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD) { strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData)); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index d50ed1052..f2476e612 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -56,12 +56,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation != PPCREC_IML_OP_OR && operation != PPCREC_IML_OP_XOR); // deprecated, use r_r_s32 for these - if (operation == PPCREC_IML_OP_MTCRF) - { - // operand register is read only - registersUsed->readGPR1 = op_r_immS32.regR; - } - else if (operation == PPCREC_IML_OP_LEFT_ROTATE) + if (operation == PPCREC_IML_OP_LEFT_ROTATE) { // operand register is read and write registersUsed->readGPR1 = op_r_immS32.regR; @@ -221,16 +216,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue; registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut; } - else if (type == PPCREC_IML_TYPE_FPR_R_NAME) - { - // fpr operation - registersUsed->writtenFPR1 = op_r_name.regR; - } - else if (type == PPCREC_IML_TYPE_FPR_NAME_R) - { - // fpr operation - registersUsed->readFPR1 = op_r_name.regR; - } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { // fpr load operation @@ -636,14 +621,6 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable); op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable); } - else if (type == PPCREC_IML_TYPE_FPR_R_NAME) - { - op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); - } - else if (type == PPCREC_IML_TYPE_FPR_NAME_R) - { - op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); - } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); @@ -766,14 +743,6 @@ void IMLInstruction::ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegist { ; } - else if (type == PPCREC_IML_TYPE_FPR_R_NAME) - { - op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_NAME_R) - { - op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); - } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); @@ -885,14 +854,6 @@ void IMLInstruction::ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegist { ; } - else if (type == PPCREC_IML_TYPE_FPR_R_NAME) - { - op_r_name.regR = replaceRegisterId(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_NAME_R) - { - op_r_name.regR = replaceRegisterId(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); - } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 8b49cd22e..817fef190 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -125,8 +125,6 @@ enum PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) PPCREC_IML_OP_CNTLZW, PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 - PPCREC_IML_OP_MFCR, // copy cr to gpr - PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask) // FPU PPCREC_IML_OP_FPR_ADD_BOTTOM, PPCREC_IML_OP_FPR_ADD_PAIR, @@ -253,8 +251,6 @@ enum PPCREC_IML_TYPE_CONDITIONAL_R_S32, // FPR - PPCREC_IML_TYPE_FPR_R_NAME, // name = f* - PPCREC_IML_TYPE_FPR_NAME_R, // f* = name PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode) PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode) PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode) @@ -412,20 +408,6 @@ struct IMLUsedRegisters F(writtenFPR1, true); } - //bool HasSameBaseFPRRegId(IMLRegID regId) const - //{ - // if (readFPR1.IsValid() && readFPR1.GetRegID() == regId) - // return true; - // if (readFPR2.IsValid() && readFPR2.GetRegID() == regId) - // return true; - // if (readFPR3.IsValid() && readFPR3.GetRegID() == regId) - // return true; - // if (readFPR4.IsValid() && readFPR4.GetRegID() == regId) - // return true; - // if (writtenFPR1.IsValid() && writtenFPR1.GetRegID() == regId) - // return true; - // return false; - //} }; struct IMLInstruction diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index b9449c949..cdf922ce7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -6,492 +6,11 @@ #include "../PPCRecompilerIml.h" #include "../BackendX64/BackendX64.h" -//bool _RegExceedsFPRSpace(IMLReg r) -//{ -// if (r.IsInvalid()) -// return false; -// if (r.GetRegID() >= PPC_X64_FPR_USABLE_REGISTERS) -// return true; -// return false; -//} - IMLReg _FPRRegFromID(IMLRegID regId) { return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId); } -//bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) -//{ -// // only xmm0 to xmm14 may be used, xmm15 is reserved -// // this method will reduce the number of fpr registers used -// // inefficient algorithm for optimizing away excess registers -// // we simply load, use and store excess registers into other unused registers when we need to -// // first we remove all name load and store instructions that involve out-of-bounds registers -// for (IMLSegment* segIt : ppcImlGenContext->segmentList2) -// { -// size_t imlIndex = 0; -// while( imlIndex < segIt->imlList.size() ) -// { -// IMLInstruction& imlInstructionItr = segIt->imlList[imlIndex]; -// if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R ) -// { -// if(_RegExceedsFPRSpace(imlInstructionItr.op_r_name.regR)) -// { -// imlInstructionItr.make_no_op(); -// } -// } -// imlIndex++; -// } -// } -// // replace registers -// for (IMLSegment* segIt : ppcImlGenContext->segmentList2) -// { -// size_t imlIndex = 0; -// while( imlIndex < segIt->imlList.size() ) -// { -// IMLUsedRegisters registersUsed; -// while( true ) -// { -// segIt->imlList[imlIndex].CheckRegisterUsage(®istersUsed); -// if(_RegExceedsFPRSpace(registersUsed.readFPR1) || _RegExceedsFPRSpace(registersUsed.readFPR2) || _RegExceedsFPRSpace(registersUsed.readFPR3) || _RegExceedsFPRSpace(registersUsed.readFPR4) || _RegExceedsFPRSpace(registersUsed.writtenFPR1) ) -// { -// // get index of register to replace -// sint32 fprToReplace = -1; -// if(_RegExceedsFPRSpace(registersUsed.readFPR1) ) -// fprToReplace = registersUsed.readFPR1.GetRegID(); -// else if(_RegExceedsFPRSpace(registersUsed.readFPR2) ) -// fprToReplace = registersUsed.readFPR2.GetRegID(); -// else if (_RegExceedsFPRSpace(registersUsed.readFPR3)) -// fprToReplace = registersUsed.readFPR3.GetRegID(); -// else if (_RegExceedsFPRSpace(registersUsed.readFPR4)) -// fprToReplace = registersUsed.readFPR4.GetRegID(); -// else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) ) -// fprToReplace = registersUsed.writtenFPR1.GetRegID(); -// if (fprToReplace >= 0) -// { -// // generate mask of useable registers -// uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0 -// if (registersUsed.readFPR1.IsValid()) -// useableRegisterMask &= ~(1 << (registersUsed.readFPR1.GetRegID())); -// if (registersUsed.readFPR2.IsValid()) -// useableRegisterMask &= ~(1 << (registersUsed.readFPR2.GetRegID())); -// if (registersUsed.readFPR3.IsValid()) -// useableRegisterMask &= ~(1 << (registersUsed.readFPR3.GetRegID())); -// if (registersUsed.readFPR4.IsValid()) -// useableRegisterMask &= ~(1 << (registersUsed.readFPR4.GetRegID())); -// if (registersUsed.writtenFPR1.IsValid()) -// useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1.GetRegID())); -// // get highest unused register index (0-6 range) -// sint32 unusedRegisterIndex = -1; -// for (sint32 f = 0; f < PPC_X64_FPR_USABLE_REGISTERS; f++) -// { -// if (useableRegisterMask & (1 << f)) -// { -// unusedRegisterIndex = f; -// } -// } -// if (unusedRegisterIndex == -1) -// assert_dbg(); -// // determine if the placeholder register is actually used (if not we must not load/store it) -// uint32 unusedRegisterName = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; -// bool replacedRegisterIsUsed = true; -// if (unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0 + 32)) -// { -// replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName - PPCREC_NAME_FPR0]; -// } -// // replace registers that are out of range -// segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex); -// // add load/store name after instruction -// PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex + 1, 2); -// // add load/store before current instruction -// PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); -// // name_unusedRegister = unusedRegister -// IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); -// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); -// if (replacedRegisterIsUsed) -// { -// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; -// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); -// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; -// } -// else -// imlInstructionItr->make_no_op(); -// imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); -// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); -// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; -// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); -// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; -// // name_gprToReplace = unusedRegister -// imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); -// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); -// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; -// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); -// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; -// // unusedRegister = name_unusedRegister -// imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); -// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); -// if (replacedRegisterIsUsed) -// { -// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; -// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); -// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; -// } -// else -// imlInstructionItr->make_no_op(); -// } -// } -// else -// break; -// } -// imlIndex++; -// } -// } -// return true; -//} -// -//typedef struct -//{ -// bool isActive; -// uint32 virtualReg; -// sint32 lastUseIndex; -//}ppcRecRegisterMapping_t; -// -//typedef struct -//{ -// ppcRecRegisterMapping_t currentMapping[PPC_X64_FPR_USABLE_REGISTERS]; -// sint32 ppcRegToMapping[64]; -// sint32 currentUseIndex; -//}ppcRecManageRegisters_t; -// -//ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters) -//{ -// // find free register -// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) -// { -// if (rCtx->currentMapping[i].isActive == false) -// { -// rCtx->currentMapping[i].isActive = true; -// rCtx->currentMapping[i].virtualReg = -1; -// rCtx->currentMapping[i].lastUseIndex = rCtx->currentUseIndex; -// return rCtx->currentMapping + i; -// } -// } -// // all registers are used -// return nullptr; -//} -// -//ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters, uint32 unloadLockedMask) -//{ -// // find unloadable register (with lowest lastUseIndex) -// sint32 unloadIndex = -1; -// sint32 unloadIndexLastUse = 0x7FFFFFFF; -// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) -// { -// if (rCtx->currentMapping[i].isActive == false) -// continue; -// if( (unloadLockedMask&(1<currentMapping[i].virtualReg; -// bool isReserved = instructionUsedRegisters->HasSameBaseFPRRegId(virtualReg); -// if (isReserved) -// continue; -// if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse) -// { -// unloadIndexLastUse = rCtx->currentMapping[i].lastUseIndex; -// unloadIndex = i; -// } -// } -// cemu_assert(unloadIndex != -1); -// return rCtx->currentMapping + unloadIndex; -//} -// -//bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenContext, sint32 segmentIndex) -//{ -// ppcRecManageRegisters_t rCtx = { 0 }; -// for (sint32 i = 0; i < 64; i++) -// rCtx.ppcRegToMapping[i] = -1; -// IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; -// size_t idx = 0; -// sint32 currentUseIndex = 0; -// IMLUsedRegisters registersUsed; -// while (idx < imlSegment->imlList.size()) -// { -// IMLInstruction& idxInst = imlSegment->imlList[idx]; -// if (idxInst.IsSuffixInstruction()) -// break; -// idxInst.CheckRegisterUsage(®istersUsed); -// IMLReg fprMatch[4]; -// IMLReg fprReplace[4]; -// fprMatch[0] = IMLREG_INVALID; -// fprMatch[1] = IMLREG_INVALID; -// fprMatch[2] = IMLREG_INVALID; -// fprMatch[3] = IMLREG_INVALID; -// fprReplace[0] = IMLREG_INVALID; -// fprReplace[1] = IMLREG_INVALID; -// fprReplace[2] = IMLREG_INVALID; -// fprReplace[3] = IMLREG_INVALID; -// // generate a mask of registers that we may not free -// sint32 numReplacedOperands = 0; -// uint32 unloadLockedMask = 0; -// for (sint32 f = 0; f < 5; f++) -// { -// IMLReg virtualFpr; -// if (f == 0) -// virtualFpr = registersUsed.readFPR1; -// else if (f == 1) -// virtualFpr = registersUsed.readFPR2; -// else if (f == 2) -// virtualFpr = registersUsed.readFPR3; -// else if (f == 3) -// virtualFpr = registersUsed.readFPR4; -// else if (f == 4) -// virtualFpr = registersUsed.writtenFPR1; -// if(virtualFpr.IsInvalid()) -// continue; -// cemu_assert_debug(virtualFpr.GetBaseFormat() == IMLRegFormat::F64); -// cemu_assert_debug(virtualFpr.GetRegFormat() == IMLRegFormat::F64); -// cemu_assert_debug(virtualFpr.GetRegID() < 64); -// // check if this virtual FPR is already loaded in any real register -// ppcRecRegisterMapping_t* regMapping; -// if (rCtx.ppcRegToMapping[virtualFpr.GetRegID()] == -1) -// { -// // not loaded -// // find available register -// while (true) -// { -// regMapping = PPCRecompiler_findAvailableRegisterDepr(&rCtx, ®istersUsed); -// if (regMapping == NULL) -// { -// // unload least recently used register and try again -// ppcRecRegisterMapping_t* unloadRegMapping = PPCRecompiler_findUnloadableRegister(&rCtx, ®istersUsed, unloadLockedMask); -// // mark as locked -// unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping)); -// // create unload instruction -// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); -// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; -// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); -// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; -// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(unloadRegMapping - rCtx.currentMapping)); -// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg]; -// idx++; -// // update mapping -// unloadRegMapping->isActive = false; -// rCtx.ppcRegToMapping[unloadRegMapping->virtualReg] = -1; -// } -// else -// break; -// } -// // create load instruction -// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); -// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; -// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); -// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; -// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(regMapping-rCtx.currentMapping)); -// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr.GetRegID()]; -// idx++; -// // update mapping -// regMapping->virtualReg = virtualFpr.GetRegID(); -// rCtx.ppcRegToMapping[virtualFpr.GetRegID()] = (sint32)(regMapping - rCtx.currentMapping); -// regMapping->lastUseIndex = rCtx.currentUseIndex; -// rCtx.currentUseIndex++; -// } -// else -// { -// regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr.GetRegID()]; -// regMapping->lastUseIndex = rCtx.currentUseIndex; -// rCtx.currentUseIndex++; -// } -// // replace FPR -// bool entryFound = false; -// for (sint32 t = 0; t < numReplacedOperands; t++) -// { -// if (fprMatch[t].IsValid() && fprMatch[t].GetRegID() == virtualFpr.GetRegID()) -// { -// cemu_assert_debug(fprReplace[t] == _FPRRegFromID(regMapping - rCtx.currentMapping)); -// entryFound = true; -// break; -// } -// } -// if (entryFound == false) -// { -// cemu_assert_debug(numReplacedOperands != 4); -// fprMatch[numReplacedOperands] = virtualFpr; -// fprReplace[numReplacedOperands] = _FPRRegFromID(regMapping - rCtx.currentMapping); -// numReplacedOperands++; -// } -// } -// if (numReplacedOperands > 0) -// { -// imlSegment->imlList[idx].ReplaceFPRs(fprMatch, fprReplace); -// } -// // next -// idx++; -// } -// // count loaded registers -// sint32 numLoadedRegisters = 0; -// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) -// { -// if (rCtx.currentMapping[i].isActive) -// numLoadedRegisters++; -// } -// // store all loaded registers -// if (numLoadedRegisters > 0) -// { -// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, numLoadedRegisters); -// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) -// { -// if (rCtx.currentMapping[i].isActive == false) -// continue; -// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; -// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); -// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; -// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionTemp->op_r_name.regR = _FPRRegFromID(i); -// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg]; -// idx++; -// } -// } -// return true; -//} -// -//bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) -//{ -// for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++) -// { -// if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false) -// return false; -// } -// return true; -//} - - -/* - * Returns true if the loaded value is guaranteed to be overwritten - */ -bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) -{ - IMLReg registerIndex = nameStoreInstruction->op_r_name.regR; - for(size_t i=startIndex; iimlList.size(); i++) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - IMLUsedRegisters registersUsed; - imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.readGPR1 == registerIndex || registersUsed.readGPR2 == registerIndex || registersUsed.readGPR3 == registerIndex ) - return false; - if (registersUsed.IsBaseGPRWritten(registerIndex)) - return true; - } - // todo: Scan next segment(s) - return false; -} - -/* - * Returns true if the loaded value is guaranteed to be overwritten - */ -bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) -{ - IMLRegID regId = nameStoreInstruction->op_r_name.regR.GetRegID(); - for(size_t i=startIndex; iimlList.size(); i++) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - IMLUsedRegisters registersUsed; - imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.readFPR1.IsValidAndSameRegID(regId) || registersUsed.readFPR2.IsValidAndSameRegID(regId) || registersUsed.readFPR3.IsValidAndSameRegID(regId) || registersUsed.readFPR4.IsValidAndSameRegID(regId)) - return false; - if( registersUsed.writtenFPR1.IsValidAndSameRegID(regId) ) - return true; - } - // todo: Scan next segment(s) - return false; -} - -/* - * Returns true if the loaded name is never changed - */ -bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) -{ - IMLReg regR = nameStoreInstruction->op_r_name.regR; - for(sint32 i=startIndex; i>=0; i--) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - IMLUsedRegisters registersUsed; - imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.IsBaseGPRWritten(regR) ) - { - if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) - return true; - return false; - } - } - return false; -} - -sint32 debugCallCounter1 = 0; - -/* - * Returns true if the name is overwritten in the current or any following segments - */ -bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) -{ - uint32 name = nameStoreInstruction->op_r_name.name; - for(size_t i=startIndex; iimlList.size(); i++) - { - const IMLInstruction& imlInstruction = imlSegment->imlList[i]; - if(imlInstruction.type == PPCREC_IML_TYPE_R_NAME ) - { - // name is loaded before being written - if (imlInstruction.op_r_name.name == name) - return false; - } - else if(imlInstruction.type == PPCREC_IML_TYPE_NAME_R ) - { - // name is written before being loaded - if (imlInstruction.op_r_name.name == name) - return true; - } - } - if( scanDepth >= 2 ) - return false; - if( imlSegment->nextSegmentIsUncertain ) - return false; - if( imlSegment->nextSegmentBranchTaken && PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, 0, nameStoreInstruction, scanDepth+1) == false ) - return false; - if( imlSegment->nextSegmentBranchNotTaken && PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, nameStoreInstruction, scanDepth+1) == false ) - return false; - if( imlSegment->nextSegmentBranchTaken == nullptr && imlSegment->nextSegmentBranchNotTaken == nullptr) - return false; - - return true; -} - -/* - * Returns true if the loaded FPR name is never changed - */ -bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) -{ - IMLRegID regId = nameStoreInstruction->op_r_name.regR.GetRegID(); - for(sint32 i=startIndex; i>=0; i--) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - IMLUsedRegisters registersUsed; - imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.writtenFPR1.IsValidAndSameRegID(regId)) - { - if(imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) - return true; - return false; - } - } - // todo: Scan next segment(s) - return false; -} - void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg fprReg) { IMLRegID fprIndex = fprReg.GetRegID(); @@ -564,7 +83,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI * Keeps denormals and other special float values intact * Slightly improves performance */ -void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) +void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) { for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { @@ -648,7 +167,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp * Advantages: * Slightly improves performance */ -void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext) +void IMLOptimizer_OptimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext) { for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { @@ -809,133 +328,3 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) } } } - -///* -// * Returns true if registerWrite overwrites any of the registers read by registerRead -// */ -//bool PPCRecompilerAnalyzer_checkForGPROverwrite(IMLUsedRegisters* registerRead, IMLUsedRegisters* registerWrite) -//{ -// if (registerWrite->writtenNamedReg1 < 0) -// return false; -// -// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg1) -// return true; -// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg2) -// return true; -// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg3) -// return true; -// return false; -//} - -void _reorderConditionModifyInstructions(IMLSegment* imlSegment) -{ -// IMLInstruction* lastInstruction = imlSegment->GetLastInstruction(); -// // last instruction is a conditional branch? -// if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP) -// return; -// if (lastInstruction->op_conditionalJump.crRegisterIndex >= 8) -// return; -// // get CR bitmask of bit required for conditional jump -// PPCRecCRTracking_t crTracking; -// IMLAnalyzer_GetCRTracking(lastInstruction, &crTracking); -// uint32 requiredCRBits = crTracking.readCRBits; -// -// // scan backwards until we find the instruction that sets the CR -// sint32 crSetterInstructionIndex = -1; -// sint32 unsafeInstructionIndex = -1; -// for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--) -// { -// IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; -// IMLAnalyzer_GetCRTracking(imlInstruction, &crTracking); -// if (crTracking.readCRBits != 0) -// return; // dont handle complex cases for now -// if (crTracking.writtenCRBits != 0) -// { -// if ((crTracking.writtenCRBits&requiredCRBits) != 0) -// { -// crSetterInstructionIndex = i; -// break; -// } -// else -// { -// return; // other CR bits overwritten (dont handle complex cases) -// } -// } -// // is safe? (no risk of overwriting x64 eflags) -// if ((imlInstruction->type == PPCREC_IML_TYPE_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_R_NAME || imlInstruction->type == PPCREC_IML_TYPE_NO_OP) || -// (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) || -// (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) || -// (imlInstruction->type == PPCREC_IML_TYPE_R_R && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) ) -// continue; -// // not safe -// if (unsafeInstructionIndex == -1) -// unsafeInstructionIndex = i; -// } -// if (crSetterInstructionIndex < 0) -// return; -// if (unsafeInstructionIndex < 0) -// return; // no danger of overwriting eflags, don't reorder -// // check if we can move the CR setter instruction to after unsafeInstructionIndex -// PPCRecCRTracking_t crTrackingSetter = crTracking; -// IMLUsedRegisters regTrackingCRSetter; -// imlSegment->imlList[crSetterInstructionIndex].CheckRegisterUsage(®TrackingCRSetter); -// if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0) -// return; // we don't handle FPR dependency yet so just ignore FPR instructions -// IMLUsedRegisters registerTracking; -// if (regTrackingCRSetter.writtenNamedReg1 >= 0) -// { -// // CR setter does write GPR -// for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) -// { -// imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); -// // reads register written by CR setter? -// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®isterTracking, ®TrackingCRSetter)) -// { -// return; // cant move CR setter because of dependency -// } -// // writes register read by CR setter? -// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) -// { -// return; // cant move CR setter because of dependency -// } -// // overwrites register written by CR setter? -// if (regTrackingCRSetter.writtenNamedReg1 == registerTracking.writtenNamedReg1) -// return; -// } -// } -// else -// { -// // CR setter does not write GPR -// for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) -// { -// imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); -// // writes register read by CR setter? -// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) -// { -// return; // cant move CR setter because of dependency -// } -// } -// } -// -// // move CR setter instruction -//#ifdef CEMU_DEBUG_ASSERT -// if ((unsafeInstructionIndex + 1) <= crSetterInstructionIndex) -// assert_dbg(); -//#endif -// IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); -// memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction)); -// imlSegment->imlList[crSetterInstructionIndex].make_no_op(); -} - -/* - * Move instructions which update the condition flags closer to the instruction that consumes them - * On x64 this improves performance since we often can avoid storing CR in memory - */ -void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext) -{ - // check if this segment has a conditional branch - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - _reorderConditionModifyInstructions(segIt); - } -} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 8cdefe251..f722e7cac 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -423,3 +423,4 @@ sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subr return cost; } + diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 70151422e..bf1868cf8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -71,34 +71,21 @@ struct PPCSegmentRegisterAllocatorInfo_t struct IMLSegment { sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection) - sint32 startOffset{}; // offset to first instruction in iml instruction list - sint32 count{}; // number of instructions in segment + sint32 loopDepth{}; uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address) uint32 x64Offset{}; // x64 code offset of segment start - uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly) // list of intermediate instructions in this segment std::vector imlList; // segment link IMLSegment* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch IMLSegment* nextSegmentBranchTaken{}; bool nextSegmentIsUncertain{}; - sint32 loopDepth{}; std::vector list_prevSegments{}; - // PPC range of segment - uint32 ppcAddrMin{}; - uint32 ppcAddrMax{}; // enterable segments bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) uint32 enterPPCAddress{}; // used if isEnterable is true - // PPC FPR use mask - //bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR - // CR use mask - uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten) - uint32 crBitsRead{}; // all bits that are read in this segment - uint32 crBitsWritten{}; // bits that are written in this segment // register allocator info PPCSegmentRegisterAllocatorInfo_t raInfo{}; - // segment state API void SetEnterable(uint32 enterAddress); void SetLinkBranchNotTaken(IMLSegment* imlSegmentDst); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 852a30c45..e73be9db0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -245,63 +245,13 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return ppcRecFunc; } -bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) +void PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext_t& ppcImlGenContext) { - // isolate entry points from function flow (enterable segments must not be the target of any other segment) - // this simplifies logic during register allocation - PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); - - // if GQRs can be predicted, optimize PSQ load/stores - PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); - - // insert name store instructions at the end of each segment but before branch instructions - //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - //{ - // if (segIt->imlList.size() == 0) - // continue; // ignore empty segments - // // analyze segment for register usage - // IMLUsedRegisters registersUsed; - // for (sint32 i = 0; i < segIt->imlList.size(); i++) - // { - // segIt->imlList[i].CheckRegisterUsage(®istersUsed); - // IMLReg accessedTempReg[5]; - // // intermediate FPRs - // accessedTempReg[0] = registersUsed.readFPR1; - // accessedTempReg[1] = registersUsed.readFPR2; - // accessedTempReg[2] = registersUsed.readFPR3; - // accessedTempReg[3] = registersUsed.readFPR4; - // accessedTempReg[4] = registersUsed.writtenFPR1; - // for (sint32 f = 0; f < 5; f++) - // { - // if (accessedTempReg[f].IsInvalid()) - // continue; - // uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f].GetRegID()]; - // if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32) - // { - // segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; - // } - // } - // } - //} - - // merge certain float load+store patterns (must happen before FPR register remapping) - PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext); - // delay byte swapping for certain load+store patterns - PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext); - - //if (numLoadedFPRRegisters > 0) - //{ - // if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false) - // { - // return false; - // } - //} - IMLRegisterAllocatorParameters raParam; for (auto& it : ppcImlGenContext.mappedRegs) raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first); - + auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64); gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX); gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX); @@ -335,6 +285,23 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14); IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); +} + +bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) +{ + // isolate entry points from function flow (enterable segments must not be the target of any other segment) + // this simplifies logic during register allocation + PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); + + // if GQRs can be predicted, optimize PSQ load/stores + PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); + + // merge certain float load+store patterns (must happen before FPR register remapping) + IMLOptimizer_OptimizeDirectFloatCopies(&ppcImlGenContext); + // delay byte swapping for certain load+store patterns + IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext); + + PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext); //PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); //PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); @@ -355,7 +322,7 @@ bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFun return false; } - // check if the current range got invalidated in the time it took to recompile it + // check if the current range got invalidated during the time it took to recompile it bool isInvalidated = false; for (auto& invRange : PPCRecompilerState.invalidationRanges) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index f474b0156..38a20a24e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -168,7 +168,7 @@ IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IML uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - __debugbreak(); + DEBUG_BREAK; //if( mappedName == PPCREC_NAME_NONE ) //{ // debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n"); @@ -187,7 +187,7 @@ uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext_t* ppcIml uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - __debugbreak(); + DEBUG_BREAK; //for(uint32 i=0; i<255; i++) //{ // if( ppcImlGenContext->mappedFPRRegister[i] == mappedName ) @@ -242,14 +242,6 @@ IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) */ IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew) { - //if( loadNew == false ) - //{ - // uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); - // if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - // return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); - //} - //uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - //return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); } @@ -259,11 +251,6 @@ IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, */ IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - //uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); - //if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - // return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); - //uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - //return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); } @@ -434,27 +421,38 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("MFCR: Not implemented\n"); - return false; - - //sint32 rD, rA, rB; - //PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - //uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0); - //return true; + sint32 rD, rA, rB; + PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regD, 0); + for (sint32 i = 0; i < 32; i++) + { + IMLReg regCrBit = _GetRegCR(ppcImlGenContext, i); + cemu_assert_debug(regCrBit.GetRegFormat() == IMLRegFormat::I32); // addition is only allowed between same-format regs + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regD, regD, 1); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regD, regD, regCrBit); + } + return true; } bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("MTCRF: Not implemented\n"); - return false; - - //uint32 rS; - //uint32 crMask; - //PPC_OPC_TEMPL_XFX(opcode, rS, crMask); - //uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask); - //return true; + uint32 rS; + uint32 crMask; + PPC_OPC_TEMPL_XFX(opcode, rS, crMask); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + uint32 crBitMask = ppc_MTCRFMaskToCRBitMask(crMask); + for (sint32 f = 0; f < 32; f++) + { + if(((crBitMask >> f) & 1) == 0) + continue; + IMLReg regCrBit = _GetRegCR(ppcImlGenContext, f); + cemu_assert_debug(regCrBit.GetRegFormat() == IMLRegFormat::I32); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regTmp, regS, (31-f)); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regCrBit, regTmp, 1); + } + return true; } void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned)