From 5ce4b7ea83ca1dbbe29ba8c1bd314b2204d250f9 Mon Sep 17 00:00:00 2001 From: Milica Lazarevic Date: Tue, 14 Mar 2023 17:15:55 +0100 Subject: [PATCH] NanoMIPS: NMLoadStoreMultiple add reg gap support We're handling the situation where the instruction sequence is regular, except for one instruction having a "wrong" Rt register number. A sequence like that is optimizable if the register with the expected register number is available. In that case, we're emitting one additional move instruction after lwm/swm. --- .../Target/Mips/NanoMipsLoadStoreMultiple.cpp | 181 ++++++++++-------- .../nanomips/loadstoremultiple_reg_gap.mir | 169 ++++++++++++++++ 2 files changed, 272 insertions(+), 78 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir diff --git a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp index 89d4e5e386c3e9..2edb090192cebc 100644 --- a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp +++ b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp @@ -35,8 +35,10 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { unsigned Rs; int64_t Offset; MachineBasicBlock *MBB; + MachineInstr *MI; LSIns(MachineInstr *MI) { + this->MI = MI; MBB = MI->getParent(); Rt = MI->getOperand(0).getReg().id(); Rs = MI->getOperand(1).getReg().id(); @@ -45,6 +47,12 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { }; using InstrList = SmallVector; using MBBIter = MachineBasicBlock::iterator; + struct Candidate { + InstrList Sequence; + size_t GapSize; + bool Move = false; + }; + using CandidateList = SmallVector; static char ID; const MipsSubtarget *STI; const TargetInstrInfo *TII; @@ -65,9 +73,11 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { unsigned getRegNo(unsigned Reg); bool isValidLoadStore(MachineInstr &MI, bool IsLoad, InstrList); bool isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize, - size_t &CurrSeqSize); + size_t &CurrSeqSize, bool &RegGap); bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad); void sortLoadStoreList(InstrList &LoadStoreList, bool IsLoad); + void findCandidatesForOptimization(InstrList &LoadStoreList, + CandidateList &Candidates); }; } // namespace @@ -125,6 +135,53 @@ void NMLoadStoreMultipleOpt::sortLoadStoreList(InstrList &LoadStoreList, std::sort(LoadStoreList.begin(), LoadStoreList.end(), CompareInstructions); } +void NMLoadStoreMultipleOpt::findCandidatesForOptimization( + InstrList &LoadStoreList, CandidateList &Candidates) { + InstrList Sequence; + size_t GapSize = 0, SeqSize = 0; + bool RegGap = false; + + auto clearSeqence = [&Sequence, &GapSize, &SeqSize, &RegGap]() { + Sequence.clear(); + GapSize = 0; + SeqSize = 0; + RegGap = false; + }; + + for (auto &MI : LoadStoreList) { + // Sequences cannot be longer than 8 instructions. + if (SeqSize == 8) { + Candidates.push_back({Sequence, GapSize}); + clearSeqence(); + } + // When starting a new sequence, there's no need to do any checks. + if (Sequence.empty()) { + Sequence.push_back(MI); + SeqSize = 1; + continue; + } + + if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize, RegGap)) { + if (SeqSize > 1) + Candidates.push_back({Sequence, GapSize}); + clearSeqence(); + } + + Sequence.push_back(MI); + SeqSize++; + + if (RegGap) { + Candidates.push_back({Sequence, GapSize, true}); + clearSeqence(); + } + } + + // Save the last valid sequence for this list. At least 2 instructions are + // neccessary for a valid sequence. + if (SeqSize > 1) + Candidates.push_back({Sequence, GapSize}); +} + // All instruction in the seqence should have the same Rs register, and // different Rt register. bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad, @@ -175,43 +232,52 @@ bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad, bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize, - size_t &CurrSeqSize) { + size_t &CurrSeqSize, + bool &RegGap) { unsigned PrevRtNo = getRegNo(Prev.Rt); unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0; Register DesiredRtReg = RC.getRegister(DesiredRtNo); if (Next.Offset == Prev.Offset + 4) { + if (Next.Rt == DesiredRtReg) + return true; + // Next.Rt != DesiredRtReg // GAP, but offset ok // lw a0, 8(a4) // lw a1, 12(a4) // lw a3, 16(a4) - if (Next.Rt != DesiredRtReg) { - // TODO + // For now, the instruction like lw a3, 16(a4) insterupts the sequence. + if (CurrSeqSize < 2) return false; - } else { - return true; - } - } else { + + assert(Register::isPhysicalRegister(DesiredRtNo) && + "Desired register is not physical!"); + if (MachineBasicBlock::LQR_Dead != + Prev.MBB->computeRegisterLiveness(TRI, DesiredRtReg, Prev.MI)) + return false; + + RegGap = true; + return true; + } + // Next.Offset != Prev.Offset + 4 + bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0; + unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1); + if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) && + Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) { // "full" GAP // lw a0, 8(a4) // lw a1, 12(a4) // lw a3, 20(a4) - bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0; - unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1); - if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) && - Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) { - LivePhysRegs LiveRegs(*TRI); - computeLiveIns(LiveRegs, *Prev.MBB); - for (size_t i = 0; i < Gap; i++) { - assert(Register::isPhysicalRegister(DesiredRtNo + i) && - "Desired register is not physical!"); - if (!LiveRegs.available(*MRI, (DesiredRtReg))) - return false; - DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1); - } - GapSize += Gap; - CurrSeqSize += Gap; - return true; + for (size_t i = 0; i < Gap; i++) { + assert(Register::isPhysicalRegister(DesiredRtNo + i) && + "Desired register is not physical!"); + if (MachineBasicBlock::LQR_Dead != + Prev.MBB->computeRegisterLiveness(TRI, DesiredRtReg, Prev.MI)) + return false; + DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1); } + GapSize += Gap; + CurrSeqSize += Gap; + return true; } return false; } @@ -219,10 +285,11 @@ bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad) { bool Modified = false; - struct Candidate { - InstrList Sequence; - size_t GapSize; - }; + + // TODO: Consider allowing interspersed arithmetic/logical operations in + // load/store sequences to reduce sensitivity to instruction ordering. Note + // that proper scheduling models will alter instruction order, increasing + // mixed memory and compute operations. Dependency checks will be required. InstrList SequenceToSort; SmallVector SequenceList; for (auto &MI : MBB) { @@ -239,59 +306,11 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, } } - SmallVector Candidates; + CandidateList Candidates; InstrList Sequence; - size_t GapSize = 0; - size_t SeqSize = 0; for (size_t i = 0; i < SequenceList.size(); i++) { sortLoadStoreList(SequenceList[i], IsLoad); - for (auto &MI : SequenceList[i]) { - // Sequences cannot be longer than 8 instructions. - if (SeqSize == 8) { - Candidates.push_back({Sequence, GapSize}); - Sequence.clear(); - GapSize = 0; - SeqSize = 0; - } - // When starting a new sequence, there's no need to do any checks. - if (Sequence.empty()) { - Sequence.push_back(MI); - SeqSize = 1; - continue; - } - - if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize)) { - if (SeqSize > 1) - Candidates.push_back({Sequence, GapSize}); - Sequence.clear(); - GapSize = 0; - SeqSize = 0; - } - - Sequence.push_back(MI); - SeqSize++; - continue; - } - - // At least 2 instructions are neccessary for a valid sequence. - if (SeqSize > 1) { - Candidates.push_back({Sequence, GapSize}); - SeqSize++; - } - - // Sequence has either ended or has never been started. - if (!Sequence.empty()) { - Sequence.clear(); - SeqSize = 0; - GapSize = 0; - } - } - - // Make sure that the last sequence has been added to the Candidates list. - // TODO: Check if needed. - if (SeqSize > 1) { - Candidates.push_back({Sequence, GapSize}); - SeqSize++; + findCandidatesForOptimization(SequenceList[i], Candidates); } for (auto &C : Candidates) { @@ -312,6 +331,12 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, .addImm(Offset) .addImm(Seq.size() + C.GapSize); BMI.cloneMergedMemRefs(Seq); + if (C.Move) { + BuildMI(MBB, std::next(MBBIter(BMI.getInstr())), Base->getDebugLoc(), + TII->get(Mips::MOVE_NM)) + .addReg(Seq.back()->getOperand(0).getReg(), RegState::Define) + .addReg(Seq[Seq.size() - 2]->getOperand(0).getReg() + 1); + } for (auto *MI : Seq) { if (MI != Base) BMI.addReg(MI->getOperand(0).getReg(), diff --git a/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir new file mode 100644 index 00000000000000..76a7e13fa63e11 --- /dev/null +++ b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir @@ -0,0 +1,169 @@ + +# RUN: llc -mtriple=nanomips -verify-machineinstrs -run-pass nanomips-lwm-swm \ +# RUN: %s -o - | FileCheck %s + +# CHECK: SWM_NM $a1_nm, $sp_nm, 4, 4 +# CHECK-NEXT: $a5_nm = MOVE_NM $a4_nm +--- | + ; ModuleID = '../llvm-project/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple.ll' + source_filename = "../llvm-project/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple.ll" + target datalayout = "e-m:e-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128" + target triple = "nanomips" + + %struct.bar = type { i32, i32, i32 } + + define void @test4(i32 %n, ...) { + call void asm sideeffect "", ""() + ret void + } + + define void @square(%struct.bar* %ints) { + %a1 = bitcast %struct.bar* %ints to i32* + %1 = load i32, i32* %a1, align 4 + %b = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 1 + %2 = load i32, i32* %b, align 4 + %add = add nsw i32 %2, %1 + %c = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 2 + store i32 %add, i32* %c, align 4 + ret void + } + +... +--- +name: test4 +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$a1_nm', virtual-reg: '' } + - { reg: '$a2_nm', virtual-reg: '' } + - { reg: '$a3_nm', virtual-reg: '' } + - { reg: '$a4_nm', virtual-reg: '' } + - { reg: '$a5_nm', virtual-reg: '' } + - { reg: '$a6_nm', virtual-reg: '' } + - { reg: '$a7_nm', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 32 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: + - { id: 0, type: default, offset: -4, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: default, offset: -8, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, type: default, offset: -12, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, type: default, offset: -16, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, type: default, offset: -20, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, type: default, offset: -24, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $a1_nm, $a2_nm, $a3_nm, $a4_nm, $a5_nm, $a6_nm, $a7_nm + + SAVE_NM 32, implicit-def $sp_nm, implicit $sp_nm + CFI_INSTRUCTION def_cfa_offset 32 + SWs9_NM killed renamable $a7_nm, $sp_nm, 28 :: (store (s32)) + SWs9_NM killed renamable $a3_nm, $sp_nm, 12 :: (store (s32)) + SWs9_NM killed renamable $a2_nm, $sp_nm, 8 :: (store (s32) into %fixed-stack.5, align 8) + SWs9_NM killed renamable $a6_nm, $sp_nm, 24 :: (store (s32) into %fixed-stack.1, align 8) + SWs9_NM killed renamable $a5_nm, $sp_nm, 16 :: (store (s32) into %fixed-stack.3, align 16) + SWs9_NM killed renamable $a1_nm, $sp_nm, 4 :: (store (s32)) + INLINEASM &"", 1 /* sideeffect attdialect */ + RESTOREJRC_NM 32, implicit-def $sp_nm, implicit $sp_nm + +... +--- +name: square +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$a0_nm', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $a0_nm + + renamable $a1_nm = LW_NM renamable $a0_nm, 0 :: (load (s32) from %ir.a1) + renamable $a2_nm = LWs9_NM renamable $a0_nm, 4 :: (load (s32) from %ir.b) + renamable $a1_nm = nsw ADDu_NM killed renamable $a2_nm, killed renamable $a1_nm + SW_NM killed renamable $a1_nm, killed renamable $a0_nm, 8 :: (store (s32) into %ir.c) + PseudoReturnNM undef $ra_nm + +...