From 208d47cf1bfe796b6024eaa4df879285d9447a88 Mon Sep 17 00:00:00 2001 From: Milica Lazarevic Date: Tue, 24 Jan 2023 10:00:23 +0100 Subject: [PATCH 1/5] Target/Mips: Move LWM/SWM opt to standalone pass LoadStoreMultiple optimization has been moved to the standalone pass so it can be expanded. Potentially, its position in a pipeline could be changed. --- llvm/lib/Target/Mips/CMakeLists.txt | 1 + llvm/lib/Target/Mips/Mips.h | 10 +- llvm/lib/Target/Mips/MipsTargetMachine.cpp | 5 +- .../Target/Mips/NanoMipsLoadStoreMultiple.cpp | 270 ++++++++++++++++++ .../Mips/NanoMipsLoadStoreOptimizer.cpp | 198 ------------- 5 files changed, 281 insertions(+), 203 deletions(-) create mode 100644 llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp diff --git a/llvm/lib/Target/Mips/CMakeLists.txt b/llvm/lib/Target/Mips/CMakeLists.txt index 2033c17deea1c9..9e01fb340b8489 100644 --- a/llvm/lib/Target/Mips/CMakeLists.txt +++ b/llvm/lib/Target/Mips/CMakeLists.txt @@ -64,6 +64,7 @@ add_llvm_target(MipsCodeGen MipsTargetTransformInfo.cpp MicroMipsSizeReduction.cpp MipsMulMulBugPass.cpp + NanoMipsLoadStoreMultiple.cpp NanoMipsLoadStoreOptimizer.cpp NanoMipsMoveOptimizer.cpp NanoMipsOptimizeJumpTables.cpp diff --git a/llvm/lib/Target/Mips/Mips.h b/llvm/lib/Target/Mips/Mips.h index 18d8006089636c..08ad99a41b53cb 100644 --- a/llvm/lib/Target/Mips/Mips.h +++ b/llvm/lib/Target/Mips/Mips.h @@ -46,6 +46,7 @@ FunctionPass *createNanoMipsMoveOptimizerPass(); FunctionPass *createNanoMipsRegisterReAllocationPass(); FunctionPass *createRedundantCopyEliminationPass(); FunctionPass *createNanoMipsCodeGenPreparePass(); +FunctionPass *createNanoMipsLoadStoreMultiplePass(); InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &, MipsSubtarget &, @@ -58,12 +59,13 @@ void initializeMipsDelaySlotFillerPass(PassRegistry &); void initializeMipsMulMulBugFixPass(PassRegistry &); void initializeMipsPostLegalizerCombinerPass(PassRegistry &); void initializeMipsPreLegalizerCombinerPass(PassRegistry &); -void initializeNMOptimizeJumpTablesPass (PassRegistry&); +void initializeNMOptimizeJumpTablesPass(PassRegistry &); void initializeNanoMipsRegisterReAllocPass(PassRegistry &); -void initializeRedundantCopyEliminationPass(PassRegistry&); +void initializeRedundantCopyEliminationPass(PassRegistry &); void initializeNanoMipsCodeGenPreparePass(PassRegistry &); -void initializeNMLoadStoreOptPass(PassRegistry&); -void initializeNMMoveOptPass(PassRegistry&); +void initializeNMLoadStoreOptPass(PassRegistry &); +void initializeNMMoveOptPass(PassRegistry &); +void initializeNMLoadStoreMultipleOptPass(PassRegistry &); } // namespace llvm #endif diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp index 7573fe14c4bd9b..2c7e6d22387fe9 100644 --- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -92,6 +92,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() { initializeMipsPostLegalizerCombinerPass(*PR); initializeMipsMulMulBugFixPass(*PR); initializeMipsDAGToDAGISelPass(*PR); + initializeNMLoadStoreMultipleOptPass(*PR); initializeRedundantCopyEliminationPass(*PR); initializeNMLoadStoreOptPass(*PR); initializeNMMoveOptPass(*PR); @@ -319,8 +320,10 @@ std::unique_ptr MipsPassConfig::getCSEConfig() const { } void MipsPassConfig::addPreSched2() { - if (getMipsSubtarget().hasNanoMips() && getOptLevel() != CodeGenOpt::None) + if (getMipsSubtarget().hasNanoMips() && getOptLevel() != CodeGenOpt::None) { addPass(createNanoMipsLoadStoreOptimizerPass()); + addPass(createNanoMipsLoadStoreMultiplePass()); + } } void MipsPassConfig::addIRPasses() { diff --git a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp new file mode 100644 index 00000000000000..3e0227a7b515f0 --- /dev/null +++ b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp @@ -0,0 +1,270 @@ +//===- NanoMipsLoadStoreMultiple.cpp - nanoMIPS load / store opt. pass +//--------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains a pass that performs load / store related peephole +/// optimizations. This pass should be run after register allocation. +// +//===----------------------------------------------------------------------===// + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +#include + +using namespace llvm; + +#define DEBUG_TYPE "nanomips-lwm-swm" +#define NM_LOAD_STORE_OPT_NAME "nanoMIPS load/store multiple optimization pass" + +static cl::opt DisableNMLoadStoreMultiple( + "disable-nm-lwm-swm", cl::Hidden, cl::init(false), + cl::desc("Disable NanoMips load/store multiple optimizations")); + +namespace { +struct NMLoadStoreMultipleOpt : public MachineFunctionPass { + struct LSIns { + unsigned Rt; + unsigned Rs; + int64_t Offset; + + LSIns(MachineInstr *MI) { + Rt = MI->getOperand(0).getReg().id(); + Rs = MI->getOperand(1).getReg().id(); + Offset = MI->getOperand(2).getImm(); + } + }; + using InstrList = SmallVector; + using MBBIter = MachineBasicBlock::iterator; + static char ID; + const MipsSubtarget *STI; + const TargetInstrInfo *TII; + MCRegisterClass RC = MipsMCRegisterClasses[Mips::GPRNM32RegClassID]; + DenseMap RegToIndexMap; + + NMLoadStoreMultipleOpt() : MachineFunctionPass(ID) { + // Initialize RegToIndexMap. + for (unsigned I = 0; I < RC.getNumRegs(); I++) { + unsigned R = RC.begin()[I]; + RegToIndexMap[R] = I; + } + } + StringRef getPassName() const override { return NM_LOAD_STORE_OPT_NAME; } + bool runOnMachineFunction(MachineFunction &Fn) override; + unsigned getRegNo(unsigned Reg); + bool isValidLoadStore(MachineInstr &MI, bool IsLoad); + bool isValidNextLoadStore(LSIns Prev, LSIns Next, bool &IsAscending); + bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad); +}; +} // namespace + +char NMLoadStoreMultipleOpt::ID = 0; + +bool NMLoadStoreMultipleOpt::runOnMachineFunction(MachineFunction &Fn) { + if (DisableNMLoadStoreMultiple) + return false; + STI = &static_cast(Fn.getSubtarget()); + TII = STI->getInstrInfo(); + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + Modified |= generateLoadStoreMultiple(MBB, /*IsLoad=*/false); + Modified |= generateLoadStoreMultiple(MBB, /*IsLoad=*/true); + } + + return Modified; +} + +unsigned NMLoadStoreMultipleOpt::getRegNo(unsigned Reg) { + auto I = RegToIndexMap.find(Reg); + + // Invalid register index. + if (I == RegToIndexMap.end()) + return RC.getNumRegs(); + + return I->second; +} + +bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad) { + unsigned Opcode = MI.getOpcode(); + // Make sure the instruction doesn't have any atomic, volatile or + // otherwise strictly ordered accesses. + for (auto &MMO : MI.memoperands()) + if (MMO->isAtomic() || !MMO->isUnordered()) + return false; + + Register Rt, Rs; + if (IsLoad) { + // TODO: Handle unaligned loads and stores. + if (Opcode == Mips::LW_NM || Opcode == Mips::LWs9_NM) { + // TODO: Rt and Rs can be equal, but only if that is the last load of + // the sequence. + Register Rt = MI.getOperand(0).getReg(); + Register Rs = MI.getOperand(1).getReg(); + if (Rt != Rs) + return true; + } + } else { + if (Opcode == Mips::SW_NM || Opcode == Mips::SWs9_NM) + return true; + } + return false; +} + +bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, + bool &IsAscending) { + if (Prev.Rs != Next.Rs) + return false; + + unsigned PrevRtNo = getRegNo(Prev.Rt); + if (Next.Offset == Prev.Offset + 4) { + // Zero register stores are a special case that does not require + // consequent $rt registers, but instead requires all $rt + // registers to be $zero. + // After processing $31, sequence continues from $16. + unsigned DesiredRtNo = + PrevRtNo != 0 ? (PrevRtNo == 31 ? 16 : PrevRtNo + 1) : 0; + if (Next.Rt != RC.getRegister(DesiredRtNo)) + return false; + + IsAscending = true; + return true; + } else if (Next.Offset == Prev.Offset - 4) { + // In case the previous register was $16 and the sequence happens to + // to go backwards, the next register can be either $15 or $31. + if (PrevRtNo == 16) { + if (Next.Rt != RC.getRegister(PrevRtNo - 1) && + Next.Rt != RC.getRegister(31)) + return false; + } else { + // Zero register stores are a special case that does not require + // consequent $rt registers, but instead requires all $rt + // registers to be $zero. + unsigned DesiredRtNo = PrevRtNo != 0 ? PrevRtNo - 1 : 0; + if (Next.Rt != RC.getRegister(DesiredRtNo)) + return false; + } + IsAscending = false; + return true; + } + return false; +} + +bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, + bool IsLoad) { + struct Candidate { + SmallVector Sequence; + bool IsAscending; + }; + bool Modified = false; + SmallVector Candidates; + SmallVector Sequence; + bool IsAscending; + for (auto &MI : MBB) { + // CFI and debug instructions don't break the sequence. + if (MI.isCFIInstruction() || MI.isDebugInstr()) + continue; + + if (isValidLoadStore(MI, IsLoad)) { + // Sequences cannot be longer than 8 instructions. + if (Sequence.size() == 8) { + Candidates.push_back({Sequence, IsAscending}); + Sequence.clear(); + } + // When starting a new sequence, there's no need to do any checks. + if (Sequence.empty()) { + Sequence.push_back(&MI); + continue; + } + bool ShouldStartNewSequence = false; + bool IsNextAscending; + if (isValidNextLoadStore(Sequence.back(), &MI, IsNextAscending)) { + if (Sequence.size() > 1) { + // In case the next instruction is going in the opposite direction + // from the sequence, start a new sequence. + if (IsAscending != IsNextAscending) { + ShouldStartNewSequence = true; + } + } else { + IsAscending = IsNextAscending; + } + } else { + // In case the next instruction is not a valid successor, save the + // current sequence (if we have one) and create a new sequence. + ShouldStartNewSequence = true; + } + + if (ShouldStartNewSequence) { + if (Sequence.size() > 1) + Candidates.push_back({Sequence, IsAscending}); + Sequence.clear(); + } + + Sequence.push_back(&MI); + continue; + } + + // At least 2 instructions are neccessary for a valid sequence. + if (Sequence.size() > 1) + Candidates.push_back({Sequence, IsAscending}); + + // Sequence has either ended or has never been started. + if (!Sequence.empty()) + Sequence.clear(); + } + + // Make sure that the last sequence has been added to the Candidates list. + if (Sequence.size() > 1) + Candidates.push_back({Sequence, IsAscending}); + + // Separate sequence to avoid removing instructions from MBB while iterating. + for (auto &C : Candidates) { + auto &Seq = C.Sequence; + + assert(Seq.size() > 1 && Seq.size() < 9); + + auto *Base = C.IsAscending ? Seq.front() : Seq.back(); + int64_t Offset = Base->getOperand(2).getImm(); + // Sequence cannot be merged, if the offset is out of range. + if (!isInt<9>(Offset)) + continue; + + auto InsertBefore = std::next(MBBIter(Base)); + unsigned Opcode = IsLoad ? Mips::LWM_NM : Mips::SWM_NM; + auto BMI = + BuildMI(MBB, InsertBefore, Base->getDebugLoc(), TII->get(Opcode)) + .addReg(Base->getOperand(0).getReg(), IsLoad ? RegState::Define : 0) + .addReg(Base->getOperand(1).getReg()) + .addImm(Offset) + .addImm(Seq.size()); + BMI.cloneMergedMemRefs(Seq); + for (auto *MI : Seq) { + if (MI != Base) + BMI.addReg(MI->getOperand(0).getReg(), + IsLoad ? RegState::ImplicitDefine : RegState::Implicit); + MBB.erase(MI); + } + + Modified = true; + } + return Modified; +} + +INITIALIZE_PASS(NMLoadStoreMultipleOpt, DEBUG_TYPE, NM_LOAD_STORE_OPT_NAME, + false, false) + +namespace llvm { +FunctionPass *createNanoMipsLoadStoreMultiplePass() { + return new NMLoadStoreMultipleOpt(); +} +} // namespace llvm \ No newline at end of file diff --git a/llvm/lib/Target/Mips/NanoMipsLoadStoreOptimizer.cpp b/llvm/lib/Target/Mips/NanoMipsLoadStoreOptimizer.cpp index 7475c239d4b3ff..56a2c42c751828 100644 --- a/llvm/lib/Target/Mips/NanoMipsLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/Mips/NanoMipsLoadStoreOptimizer.cpp @@ -27,27 +27,12 @@ static cl::opt DisableNMSaveRestore("disable-nm-save-restore", cl::Hidden, cl::init(false), cl::desc("Disable NanoMips save/restore optimizations")); -static cl::opt -DisableNMLoadStoreMultiple("disable-nm-lwm-swm", cl::Hidden, cl::init(false), - cl::desc("Disable NanoMips load/store multiple optimizations")); - static cl::opt DisableNMPCRelOpt("disable-nm-pcrel-opt", cl::Hidden, cl::init(false), cl::desc("Disable NanoMips PC-relative addressing optimization")); namespace { struct NMLoadStoreOpt : public MachineFunctionPass { - struct LSIns { - unsigned Rt; - unsigned Rs; - int64_t Offset; - - LSIns(MachineInstr *MI) { - Rt = MI->getOperand(0).getReg().id(); - Rs = MI->getOperand(1).getReg().id(); - Offset = MI->getOperand(2).getImm(); - } - }; using InstrList = SmallVector; using MBBIter = MachineBasicBlock::iterator; static char ID; @@ -73,10 +58,6 @@ struct NMLoadStoreOpt : public MachineFunctionPass { bool isRASaved(const InstrList &StoreSequence); bool isValidSaveRestore16Offset(int64_t Offset); bool generateSaveOrRestore(MachineBasicBlock &MBB, bool IsRestore); - unsigned getRegNo(unsigned Reg); - bool isValidLoadStore(MachineInstr &MI, bool IsLoad); - bool isValidNextLoadStore(LSIns Prev, LSIns Next, bool &IsAscending); - bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad); bool generatePCRelative(MachineBasicBlock &MBB); }; } // namespace @@ -94,10 +75,6 @@ bool NMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { Modified |= generateSaveOrRestore(MBB, /*IsRestore=*/false); Modified |= generateSaveOrRestore(MBB, /*IsRestore=*/true); } - if (!DisableNMLoadStoreMultiple && Fn.getFunction().hasOptSize()) { - Modified |= generateLoadStoreMultiple(MBB, /*IsLoad=*/false); - Modified |= generateLoadStoreMultiple(MBB, /*IsLoad=*/true); - } if (!DisableNMPCRelOpt) Modified |= generatePCRelative(MBB); } @@ -465,181 +442,6 @@ bool NMLoadStoreOpt::generateSaveOrRestore(MachineBasicBlock &MBB, return false; } -unsigned NMLoadStoreOpt::getRegNo(unsigned Reg) { - for (unsigned I = 0; I < RC.getNumRegs(); I++) { - unsigned R = RC.begin()[I]; - if (R == Reg) - return I; - } - // Invalid register index. - return RC.getNumRegs(); -} - -bool NMLoadStoreOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad) { - unsigned Opcode = MI.getOpcode(); - - // Make sure the instruction doesn't have any atomic, volatile or - // otherwise strictly ordered accesses. - for (auto &MMO : MI.memoperands()) - if (MMO->isAtomic() || !MMO->isUnordered()) - return false; - - if (IsLoad) { - // TODO: Handle unaligned loads and stores. - if (Opcode == Mips::LW_NM || Opcode == Mips::LWs9_NM) { - // TODO: Rt and Rs can be equal, but only if that is the last load of - // the sequence. - Register Rt = MI.getOperand(0).getReg(); - Register Rs = MI.getOperand(1).getReg(); - if (Rt != Rs) - return true; - } - } else { - if (Opcode == Mips::SW_NM || Opcode == Mips::SWs9_NM) - return true; - } - return false; -} - -bool NMLoadStoreOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, - bool &IsAscending) { - if (Prev.Rs != Next.Rs) - return false; - - unsigned PrevRtNo = getRegNo(Prev.Rt); - if (Next.Offset == Prev.Offset + 4) { - // Zero register stores are a special case that does not require - // consequent $rt registers, but instead requires all $rt - // registers to be $zero. - // After processing $31, sequence continues from $16. - unsigned DesiredRtNo = - PrevRtNo != 0 ? (PrevRtNo == 31 ? 16 : PrevRtNo + 1) : 0; - if (Next.Rt != RC.getRegister(DesiredRtNo)) - return false; - - IsAscending = true; - return true; - } else if (Next.Offset == Prev.Offset - 4) { - // In case the previous register was $16 and the sequence happens to - // to go backwards, the next register can be either $15 or $31. - if (PrevRtNo == 16) { - if (Next.Rt != RC.getRegister(PrevRtNo - 1) && - Next.Rt != RC.getRegister(31)) - return false; - } else { - // Zero register stores are a special case that does not require - // consequent $rt registers, but instead requires all $rt - // registers to be $zero. - unsigned DesiredRtNo = PrevRtNo != 0 ? PrevRtNo - 1 : 0; - if (Next.Rt != RC.getRegister(DesiredRtNo)) - return false; - } - IsAscending = false; - return true; - } - return false; -} - -bool NMLoadStoreOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, - bool IsLoad) { - struct Candidate { - SmallVector Sequence; - bool IsAscending; - }; - bool Modified = false; - SmallVector Candidates; - SmallVector Sequence; - bool IsAscending; - for (auto &MI : MBB) { - // CFI and debug instructions don't break the sequence. - if (MI.isCFIInstruction() || MI.isDebugInstr()) - continue; - - if (isValidLoadStore(MI, IsLoad)) { - // Sequences cannot be longer than 8 instructions. - if (Sequence.size() == 8) { - Candidates.push_back({Sequence, IsAscending}); - Sequence.clear(); - } - // When starting a new sequence, there's no need to do any checks. - if (Sequence.empty()) { - Sequence.push_back(&MI); - continue; - } - bool ShouldStartNewSequence = false; - bool IsNextAscending; - if (isValidNextLoadStore(Sequence.back(), &MI, IsNextAscending)) { - if (Sequence.size() > 1) { - // In case the next instruction is going in the opposite direction - // from the sequence, start a new sequence. - if (IsAscending != IsNextAscending) { - ShouldStartNewSequence = true; - } - } else { - IsAscending = IsNextAscending; - } - } else { - // In case the next instruction is not a valid successor, save the - // current sequence (if we have one) and create a new sequence. - ShouldStartNewSequence = true; - } - - if (ShouldStartNewSequence) { - if (Sequence.size() > 1) - Candidates.push_back({Sequence, IsAscending}); - Sequence.clear(); - } - - Sequence.push_back(&MI); - continue; - } - - // At least 2 instructions are neccessary for a valid sequence. - if (Sequence.size() > 1) - Candidates.push_back({Sequence, IsAscending}); - - // Sequence has either ended or has never been started. - if (!Sequence.empty()) - Sequence.clear(); - } - - // Make sure that the last sequence has been added to the Candidates list. - if (Sequence.size() > 1) - Candidates.push_back({Sequence, IsAscending}); - - // Separate sequence to avoid removing instructions from MBB while iterating. - for (auto &C : Candidates) { - auto &Seq = C.Sequence; - - assert(Seq.size() > 1 && Seq.size() < 9); - - auto *Base = C.IsAscending ? Seq.front() : Seq.back(); - int64_t Offset = Base->getOperand(2).getImm(); - // Sequence cannot be merged, if the offset is out of range. - if (!isInt<9>(Offset)) - continue; - - auto InsertBefore = std::next(MBBIter(Base)); - unsigned Opcode = IsLoad ? Mips::LWM_NM : Mips::SWM_NM; - auto BMI = - BuildMI(MBB, InsertBefore, Base->getDebugLoc(), TII->get(Opcode)) - .addReg(Base->getOperand(0).getReg(), IsLoad ? RegState::Define : 0) - .addReg(Base->getOperand(1).getReg()) - .addImm(Offset) - .addImm(Seq.size()); - BMI.cloneMergedMemRefs(Seq); - for (auto *MI : Seq) { - if (MI != Base) - BMI.addReg(MI->getOperand(0).getReg(), - IsLoad ? RegState::ImplicitDefine : RegState::Implicit); - MBB.erase(MI); - } - - Modified = true; - } - return Modified; -} - // Check if the instruction is lw, sw or addiu with Reg as second operand. static bool isValidUse(MachineInstr *MI, Register Reg) { switch (MI->getOpcode()) { From 3720af866633b0c8fa66c9ad968dd1d260a8b558 Mon Sep 17 00:00:00 2001 From: Milica Lazarevic Date: Tue, 31 Jan 2023 16:01:07 +0100 Subject: [PATCH 2/5] NanoMIPS: Recognize unsorted sequences for LWM/SWM Additional support is added to recognize more sequences. Before, a sequence like this wasn't accepted: lw a1, 4(a0) lw a3, 12(a0) lw a2, 8(a0) Now, when we're sorting instructions by the reg:offset pair internally, the above is accepted. --- .../Target/Mips/NanoMipsLoadStoreMultiple.cpp | 175 ++++++++++-------- .../nanomips/loadstoremultiple_unsorted.mir | 85 +++++++++ 2 files changed, 178 insertions(+), 82 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_unsorted.mir diff --git a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp index 3e0227a7b515f0..9289645e2c60e2 100644 --- a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp +++ b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp @@ -43,7 +43,7 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { Offset = MI->getOperand(2).getImm(); } }; - using InstrList = SmallVector; + using InstrList = SmallVector; using MBBIter = MachineBasicBlock::iterator; static char ID; const MipsSubtarget *STI; @@ -61,9 +61,10 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { StringRef getPassName() const override { return NM_LOAD_STORE_OPT_NAME; } bool runOnMachineFunction(MachineFunction &Fn) override; unsigned getRegNo(unsigned Reg); - bool isValidLoadStore(MachineInstr &MI, bool IsLoad); - bool isValidNextLoadStore(LSIns Prev, LSIns Next, bool &IsAscending); + bool isValidLoadStore(MachineInstr &MI, bool IsLoad, InstrList); + bool isValidNextLoadStore(LSIns Prev, LSIns Next); bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad); + void sortLoadStoreList(InstrList &LoadStoreList, bool IsLoad); }; } // namespace @@ -95,7 +96,36 @@ unsigned NMLoadStoreMultipleOpt::getRegNo(unsigned Reg) { return I->second; } -bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad) { +// Here, we're sorting InstrList to be able to easily recognize sequences that +// are not sorted by the reg-offset pair. We're sorting ascending by register +// number. Later we check if the offsets are in the desired order. The +// exceptions are zero register stores. In that case, the sorting is done by the +// offset. +// Currently, the following case is not supported: +// lw a30, 4 (a9) +// lw a31, 8 (a9) +// lw a16, 12(a9) +void NMLoadStoreMultipleOpt::sortLoadStoreList(InstrList &LoadStoreList, + bool IsLoad) { + auto CompareInstructions = [this, IsLoad](MachineInstr *First, + MachineInstr *Second) { + Register FirstReg = First->getOperand(0).getReg(); + Register SecondReg = Second->getOperand(0).getReg(); + unsigned FirstRegNo = getRegNo(FirstReg); + unsigned SecondRegNo = getRegNo(SecondReg); + + // For the zero register stores, sort instructions by the Offset. + if (!IsLoad && FirstRegNo == 0 && SecondRegNo == 0) + return First->getOperand(2).getImm() < Second->getOperand(2).getImm(); + return FirstRegNo < SecondRegNo; + }; + std::sort(LoadStoreList.begin(), LoadStoreList.end(), CompareInstructions); +} + +// All instruction in the seqence should have the same Rs register, and +// different Rt register. +bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad, + InstrList Sequence) { unsigned Opcode = MI.getOpcode(); // Make sure the instruction doesn't have any atomic, volatile or // otherwise strictly ordered accesses. @@ -106,55 +136,46 @@ bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad) { Register Rt, Rs; if (IsLoad) { // TODO: Handle unaligned loads and stores. - if (Opcode == Mips::LW_NM || Opcode == Mips::LWs9_NM) { - // TODO: Rt and Rs can be equal, but only if that is the last load of - // the sequence. - Register Rt = MI.getOperand(0).getReg(); - Register Rs = MI.getOperand(1).getReg(); - if (Rt != Rs) - return true; - } + if (Opcode != Mips::LW_NM && Opcode != Mips::LWs9_NM) + return false; + + Rt = MI.getOperand(0).getReg(); + Rs = MI.getOperand(1).getReg(); + + // TODO: Rt and Rs can be equal, but only if that is the last load of + // the sequence. + if (Rt == Rs) + return false; + } else { - if (Opcode == Mips::SW_NM || Opcode == Mips::SWs9_NM) - return true; + if (Opcode != Mips::SW_NM && Opcode != Mips::SWs9_NM) + return false; + Rt = MI.getOperand(0).getReg(); + Rs = MI.getOperand(1).getReg(); } + + if (Sequence.size() > 0) { + auto SeqRs = Sequence.back()->getOperand(1).getReg(); + if (Rs != SeqRs) + return false; + } + auto RtExists = [&Rt](const MachineInstr *I) { + return I->getOperand(0).getReg() == Rt; + }; + auto It = std::find_if(Sequence.begin(), Sequence.end(), RtExists); + // Zero register stores are a special case that does not require consequent + // $rt registers, but instead requires all $rt registers to be $zero. + if (It == Sequence.end() || getRegNo(Rt) == 0) + return true; return false; } -bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, - bool &IsAscending) { - if (Prev.Rs != Next.Rs) - return false; - +bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next) { unsigned PrevRtNo = getRegNo(Prev.Rt); if (Next.Offset == Prev.Offset + 4) { - // Zero register stores are a special case that does not require - // consequent $rt registers, but instead requires all $rt - // registers to be $zero. - // After processing $31, sequence continues from $16. - unsigned DesiredRtNo = - PrevRtNo != 0 ? (PrevRtNo == 31 ? 16 : PrevRtNo + 1) : 0; + unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0; if (Next.Rt != RC.getRegister(DesiredRtNo)) return false; - - IsAscending = true; - return true; - } else if (Next.Offset == Prev.Offset - 4) { - // In case the previous register was $16 and the sequence happens to - // to go backwards, the next register can be either $15 or $31. - if (PrevRtNo == 16) { - if (Next.Rt != RC.getRegister(PrevRtNo - 1) && - Next.Rt != RC.getRegister(31)) - return false; - } else { - // Zero register stores are a special case that does not require - // consequent $rt registers, but instead requires all $rt - // registers to be $zero. - unsigned DesiredRtNo = PrevRtNo != 0 ? PrevRtNo - 1 : 0; - if (Next.Rt != RC.getRegister(DesiredRtNo)) - return false; - } - IsAscending = false; return true; } return false; @@ -162,61 +183,53 @@ bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad) { - struct Candidate { - SmallVector Sequence; - bool IsAscending; - }; bool Modified = false; - SmallVector Candidates; - SmallVector Sequence; - bool IsAscending; + + InstrList SequenceToSort; + SmallVector SequenceList; for (auto &MI : MBB) { // CFI and debug instructions don't break the sequence. if (MI.isCFIInstruction() || MI.isDebugInstr()) continue; + if (isValidLoadStore(MI, IsLoad, SequenceToSort)) { + SequenceToSort.push_back(&MI); + continue; + } + if (SequenceToSort.size() > 1) { + SequenceList.push_back(SequenceToSort); + SequenceToSort.clear(); + } + } + + SmallVector Candidates; + InstrList Sequence; - if (isValidLoadStore(MI, IsLoad)) { + for (size_t i = 0; i < SequenceList.size(); i++) { + sortLoadStoreList(SequenceList[i], IsLoad); + for (auto &MI : SequenceList[i]) { // Sequences cannot be longer than 8 instructions. if (Sequence.size() == 8) { - Candidates.push_back({Sequence, IsAscending}); + Candidates.push_back(Sequence); Sequence.clear(); } // When starting a new sequence, there's no need to do any checks. if (Sequence.empty()) { - Sequence.push_back(&MI); + Sequence.push_back(MI); continue; } - bool ShouldStartNewSequence = false; - bool IsNextAscending; - if (isValidNextLoadStore(Sequence.back(), &MI, IsNextAscending)) { - if (Sequence.size() > 1) { - // In case the next instruction is going in the opposite direction - // from the sequence, start a new sequence. - if (IsAscending != IsNextAscending) { - ShouldStartNewSequence = true; - } - } else { - IsAscending = IsNextAscending; - } - } else { - // In case the next instruction is not a valid successor, save the - // current sequence (if we have one) and create a new sequence. - ShouldStartNewSequence = true; - } - - if (ShouldStartNewSequence) { + if (!isValidNextLoadStore(Sequence.back(), MI)) { if (Sequence.size() > 1) - Candidates.push_back({Sequence, IsAscending}); + Candidates.push_back(Sequence); Sequence.clear(); } - Sequence.push_back(&MI); + Sequence.push_back(MI); continue; } // At least 2 instructions are neccessary for a valid sequence. if (Sequence.size() > 1) - Candidates.push_back({Sequence, IsAscending}); + Candidates.push_back(Sequence); // Sequence has either ended or has never been started. if (!Sequence.empty()) @@ -224,16 +237,14 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, } // Make sure that the last sequence has been added to the Candidates list. + // TODO: Check if needed. if (Sequence.size() > 1) - Candidates.push_back({Sequence, IsAscending}); - - // Separate sequence to avoid removing instructions from MBB while iterating. - for (auto &C : Candidates) { - auto &Seq = C.Sequence; + Candidates.push_back(Sequence); + for (auto &Seq : Candidates) { assert(Seq.size() > 1 && Seq.size() < 9); - auto *Base = C.IsAscending ? Seq.front() : Seq.back(); + auto *Base = Seq.front(); int64_t Offset = Base->getOperand(2).getImm(); // Sequence cannot be merged, if the offset is out of range. if (!isInt<9>(Offset)) diff --git a/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_unsorted.mir b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_unsorted.mir new file mode 100644 index 00000000000000..9be526ed608049 --- /dev/null +++ b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_unsorted.mir @@ -0,0 +1,85 @@ + +# RUN: llc -mtriple=nanomips -verify-machineinstrs -run-pass nanomips-lwm-swm \ +# RUN: %s -o - | FileCheck %s + +# CHECK: SWM_NM $a1_nm, $sp_nm, 4, 7 +--- | + %struct.bar = type { i32, i32, i32 } + + define void @test4(i32 %n, ...) { + call void asm sideeffect "", ""() + ret void + } + + define void @square(%struct.bar* %ints) { + %a1 = bitcast %struct.bar* %ints to i32* + %1 = load i32, i32* %a1, align 4 + %b = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 1 + %2 = load i32, i32* %b, align 4 + %add = add nsw i32 %2, %1 + %c = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 2 + store i32 %add, i32* %c, align 4 + ret void + } + +... +--- +name: test4 +fixedStack: + - { id: 0, type: default, offset: -4, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: default, offset: -8, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, type: default, offset: -12, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, type: default, offset: -16, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, type: default, offset: -20, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, type: default, offset: -24, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0 (%ir-block.0): + liveins: $a1_nm, $a2_nm, $a3_nm, $a4_nm, $a5_nm, $a6_nm, $a7_nm + + SAVE_NM 32, implicit-def $sp_nm, implicit $sp_nm + CFI_INSTRUCTION def_cfa_offset 32 + SWs9_NM killed renamable $a7_nm, $sp_nm, 28 :: (store (s32)) + SWs9_NM killed renamable $a3_nm, $sp_nm, 12 :: (store (s32)) + SWs9_NM killed renamable $a5_nm, $sp_nm, 20 :: (store (s32)) + SWs9_NM killed renamable $a2_nm, $sp_nm, 8 :: (store (s32) into %fixed-stack.5, align 8) + SWs9_NM killed renamable $a6_nm, $sp_nm, 24 :: (store (s32) into %fixed-stack.1, align 8) + SWs9_NM killed renamable $a4_nm, $sp_nm, 16 :: (store (s32) into %fixed-stack.3, align 16) + SWs9_NM killed renamable $a1_nm, $sp_nm, 4 :: (store (s32)) + INLINEASM &"", 1 /* sideeffect attdialect */ + RESTOREJRC_NM 32, implicit-def $sp_nm, implicit $sp_nm + +... +--- +name: square +body: | + bb.0 (%ir-block.0): + liveins: $a0_nm + + renamable $a1_nm = LW_NM renamable $a0_nm, 0 :: (load (s32) from %ir.a1) + renamable $a2_nm = LWs9_NM renamable $a0_nm, 4 :: (load (s32) from %ir.b) + renamable $a1_nm = nsw ADDu_NM killed renamable $a2_nm, killed renamable $a1_nm + SW_NM killed renamable $a1_nm, killed renamable $a0_nm, 8 :: (store (s32) into %ir.c) + PseudoReturnNM undef $ra_nm + +... From eb7a730fdf71472b71608771d32e4023376fcd3c Mon Sep 17 00:00:00 2001 From: Milica Lazarevic Date: Mon, 13 Feb 2023 09:35:41 +0100 Subject: [PATCH 3/5] NanoMIPS: NMLoadStoreMultiple add gap support Beside regular instruction sequence, we're also looking for an instruction sequence that's "missing" one(or more) instruction to be complete and interchangeable with lwm/swm instruction. The sequence is optimizable if the Rt register in missing instruction is available. --- .../Target/Mips/NanoMipsLoadStoreMultiple.cpp | 101 ++++++++++++++---- .../nanomips/loadstoremultiple_full_gap.mir | 84 +++++++++++++++ .../loadstoremultiple_multiple_full_gap.mir | 82 ++++++++++++++ 3 files changed, 246 insertions(+), 21 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_full_gap.mir create mode 100644 llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_multiple_full_gap.mir diff --git a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp index 9289645e2c60e2..5f299461f53df0 100644 --- a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp +++ b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp @@ -14,9 +14,11 @@ #include "Mips.h" #include "MipsSubtarget.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/InitializePasses.h" #include @@ -36,8 +38,10 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { unsigned Rt; unsigned Rs; int64_t Offset; + MachineBasicBlock *MBB; LSIns(MachineInstr *MI) { + MBB = MI->getParent(); Rt = MI->getOperand(0).getReg().id(); Rs = MI->getOperand(1).getReg().id(); Offset = MI->getOperand(2).getImm(); @@ -48,6 +52,8 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { static char ID; const MipsSubtarget *STI; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; MCRegisterClass RC = MipsMCRegisterClasses[Mips::GPRNM32RegClassID]; DenseMap RegToIndexMap; @@ -62,7 +68,8 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &Fn) override; unsigned getRegNo(unsigned Reg); bool isValidLoadStore(MachineInstr &MI, bool IsLoad, InstrList); - bool isValidNextLoadStore(LSIns Prev, LSIns Next); + bool isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize, + size_t &CurrSeqSize); bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad); void sortLoadStoreList(InstrList &LoadStoreList, bool IsLoad); }; @@ -75,6 +82,8 @@ bool NMLoadStoreMultipleOpt::runOnMachineFunction(MachineFunction &Fn) { return false; STI = &static_cast(Fn.getSubtarget()); TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); + MRI = &Fn.getRegInfo(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { @@ -170,13 +179,45 @@ bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad, return false; } -bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next) { +bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, + size_t &GapSize, + size_t &CurrSeqSize) { unsigned PrevRtNo = getRegNo(Prev.Rt); + unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0; + Register DesiredRtReg = RC.getRegister(DesiredRtNo); if (Next.Offset == Prev.Offset + 4) { - unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0; - if (Next.Rt != RC.getRegister(DesiredRtNo)) + // GAP, but offset ok + // lw a0, 8(a4) + // lw a1, 12(a4) + // lw a3, 16(a4) + if (Next.Rt != DesiredRtReg) { + // TODO return false; - return true; + } else { + return true; + } + } else { + // "full" GAP + // lw a0, 8(a4) + // lw a1, 12(a4) + // lw a3, 20(a4) + bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0; + unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1); + if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) && + Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) { + LivePhysRegs LiveRegs(*TRI); + computeLiveIns(LiveRegs, *Prev.MBB); + for (size_t i = 0; i < Gap; i++) { + assert(Register::isPhysicalRegister(DesiredRtNo + i) && + "Desired register is not physical!"); + if (!LiveRegs.available(*MRI, (DesiredRtReg))) + return false; + DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1); + } + GapSize += Gap; + CurrSeqSize += Gap; + return true; + } } return false; } @@ -184,7 +225,10 @@ bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next) { bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad) { bool Modified = false; - + struct Candidate { + InstrList Sequence; + size_t GapSize; + }; InstrList SequenceToSort; SmallVector SequenceList; for (auto &MI : MBB) { @@ -201,49 +245,64 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, } } - SmallVector Candidates; + SmallVector Candidates; InstrList Sequence; - + size_t GapSize = 0; + size_t SeqSize = 0; for (size_t i = 0; i < SequenceList.size(); i++) { sortLoadStoreList(SequenceList[i], IsLoad); for (auto &MI : SequenceList[i]) { // Sequences cannot be longer than 8 instructions. - if (Sequence.size() == 8) { - Candidates.push_back(Sequence); + if (SeqSize == 8) { + Candidates.push_back({Sequence, GapSize}); Sequence.clear(); + GapSize = 0; + SeqSize = 0; } // When starting a new sequence, there's no need to do any checks. if (Sequence.empty()) { Sequence.push_back(MI); + SeqSize = 1; continue; } - if (!isValidNextLoadStore(Sequence.back(), MI)) { - if (Sequence.size() > 1) - Candidates.push_back(Sequence); + + if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize)) { + if (SeqSize > 1) + Candidates.push_back({Sequence, GapSize}); Sequence.clear(); + GapSize = 0; + SeqSize = 0; } Sequence.push_back(MI); + SeqSize++; continue; } // At least 2 instructions are neccessary for a valid sequence. - if (Sequence.size() > 1) - Candidates.push_back(Sequence); + if (SeqSize > 1) { + Candidates.push_back({Sequence, GapSize}); + SeqSize++; + } // Sequence has either ended or has never been started. - if (!Sequence.empty()) + if (!Sequence.empty()) { Sequence.clear(); + SeqSize = 0; + GapSize = 0; + } } // Make sure that the last sequence has been added to the Candidates list. // TODO: Check if needed. - if (Sequence.size() > 1) - Candidates.push_back(Sequence); + if (SeqSize > 1) { + Candidates.push_back({Sequence, GapSize}); + SeqSize++; + } - for (auto &Seq : Candidates) { + for (auto &C : Candidates) { + auto Seq = C.Sequence; assert(Seq.size() > 1 && Seq.size() < 9); - auto *Base = Seq.front(); int64_t Offset = Base->getOperand(2).getImm(); // Sequence cannot be merged, if the offset is out of range. @@ -257,7 +316,7 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, .addReg(Base->getOperand(0).getReg(), IsLoad ? RegState::Define : 0) .addReg(Base->getOperand(1).getReg()) .addImm(Offset) - .addImm(Seq.size()); + .addImm(Seq.size() + C.GapSize); BMI.cloneMergedMemRefs(Seq); for (auto *MI : Seq) { if (MI != Base) diff --git a/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_full_gap.mir b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_full_gap.mir new file mode 100644 index 00000000000000..cccb24a2853385 --- /dev/null +++ b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_full_gap.mir @@ -0,0 +1,84 @@ + +# RUN: llc -mtriple=nanomips -verify-machineinstrs -run-pass nanomips-lwm-swm \ +# RUN: %s -o - | FileCheck %s + +# CHECK: SWM_NM $a1_nm, $sp_nm, 4, 7 +--- | + %struct.bar = type { i32, i32, i32 } + + define void @test4(i32 %n, ...) { + call void asm sideeffect "", ""() + ret void + } + + define void @square(%struct.bar* %ints) { + %a1 = bitcast %struct.bar* %ints to i32* + %1 = load i32, i32* %a1, align 4 + %b = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 1 + %2 = load i32, i32* %b, align 4 + %add = add nsw i32 %2, %1 + %c = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 2 + store i32 %add, i32* %c, align 4 + ret void + } + +... +--- +name: test4 +fixedStack: + - { id: 0, type: default, offset: -4, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: default, offset: -8, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, type: default, offset: -12, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, type: default, offset: -16, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, type: default, offset: -20, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, type: default, offset: -24, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0 (%ir-block.0): + liveins: $a1_nm, $a2_nm, $a3_nm, $a4_nm, $a6_nm, $a7_nm + + SAVE_NM 32, implicit-def $sp_nm, implicit $sp_nm + CFI_INSTRUCTION def_cfa_offset 32 + SWs9_NM killed renamable $a7_nm, $sp_nm, 28 :: (store (s32)) + SWs9_NM killed renamable $a3_nm, $sp_nm, 12 :: (store (s32)) + SWs9_NM killed renamable $a2_nm, $sp_nm, 8 :: (store (s32) into %fixed-stack.5, align 8) + SWs9_NM killed renamable $a6_nm, $sp_nm, 24 :: (store (s32) into %fixed-stack.1, align 8) + SWs9_NM killed renamable $a4_nm, $sp_nm, 16 :: (store (s32) into %fixed-stack.3, align 16) + SWs9_NM killed renamable $a1_nm, $sp_nm, 4 :: (store (s32)) + INLINEASM &"", 1 /* sideeffect attdialect */ + RESTOREJRC_NM 32, implicit-def $sp_nm, implicit $sp_nm + +... +--- +name: square +body: | + bb.0 (%ir-block.0): + liveins: $a0_nm + + renamable $a1_nm = LW_NM renamable $a0_nm, 0 :: (load (s32) from %ir.a1) + renamable $a2_nm = LWs9_NM renamable $a0_nm, 4 :: (load (s32) from %ir.b) + renamable $a1_nm = nsw ADDu_NM killed renamable $a2_nm, killed renamable $a1_nm + SW_NM killed renamable $a1_nm, killed renamable $a0_nm, 8 :: (store (s32) into %ir.c) + PseudoReturnNM undef $ra_nm + +... diff --git a/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_multiple_full_gap.mir b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_multiple_full_gap.mir new file mode 100644 index 00000000000000..fb5b615d5fbf8a --- /dev/null +++ b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_multiple_full_gap.mir @@ -0,0 +1,82 @@ + +# RUN: llc -mtriple=nanomips -verify-machineinstrs -run-pass nanomips-lwm-swm \ +# RUN: %s -o - | FileCheck %s + +# CHECK: SWM_NM $a1_nm, $sp_nm, 4, 7 +--- | + %struct.bar = type { i32, i32, i32 } + + define void @test4(i32 %n, ...) { + call void asm sideeffect "", ""() + ret void + } + + define void @square(%struct.bar* %ints) { + %a1 = bitcast %struct.bar* %ints to i32* + %1 = load i32, i32* %a1, align 4 + %b = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 1 + %2 = load i32, i32* %b, align 4 + %add = add nsw i32 %2, %1 + %c = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 2 + store i32 %add, i32* %c, align 4 + ret void + } + +... +--- +name: test4 +fixedStack: + - { id: 0, type: default, offset: -4, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: default, offset: -8, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, type: default, offset: -12, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, type: default, offset: -16, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, type: default, offset: -20, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, type: default, offset: -24, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0 (%ir-block.0): + liveins: $a1_nm, $a4_nm, $a6_nm, $a7_nm + + SAVE_NM 32, implicit-def $sp_nm, implicit $sp_nm + CFI_INSTRUCTION def_cfa_offset 32 + SWs9_NM killed renamable $a7_nm, $sp_nm, 28 :: (store (s32)) + SWs9_NM killed renamable $a6_nm, $sp_nm, 24 :: (store (s32) into %fixed-stack.1, align 8) + SWs9_NM killed renamable $a4_nm, $sp_nm, 16 :: (store (s32) into %fixed-stack.3, align 16) + SWs9_NM killed renamable $a1_nm, $sp_nm, 4 :: (store (s32)) + INLINEASM &"", 1 /* sideeffect attdialect */ + RESTOREJRC_NM 32, implicit-def $sp_nm, implicit $sp_nm + +... +--- +name: square +body: | + bb.0 (%ir-block.0): + liveins: $a0_nm + + renamable $a1_nm = LW_NM renamable $a0_nm, 0 :: (load (s32) from %ir.a1) + renamable $a2_nm = LWs9_NM renamable $a0_nm, 4 :: (load (s32) from %ir.b) + renamable $a1_nm = nsw ADDu_NM killed renamable $a2_nm, killed renamable $a1_nm + SW_NM killed renamable $a1_nm, killed renamable $a0_nm, 8 :: (store (s32) into %ir.c) + PseudoReturnNM undef $ra_nm + +... From 5ea10a5a0c248e99953ec65b0e3fc893ef4d4a3b Mon Sep 17 00:00:00 2001 From: Milica Lazarevic Date: Tue, 14 Mar 2023 17:15:55 +0100 Subject: [PATCH 4/5] NanoMIPS: NMLoadStoreMultiple add reg gap support We're handling the situation where the instruction sequence is regular, except for one instruction having a "wrong" Rt register number. A sequence like that is optimizable if the register with the expected register number is available. In that case, we're emitting one additional move instruction after lwm/swm. --- .../Target/Mips/NanoMipsLoadStoreMultiple.cpp | 181 ++++++++++-------- .../nanomips/loadstoremultiple_reg_gap.mir | 85 ++++++++ 2 files changed, 188 insertions(+), 78 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir diff --git a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp index 5f299461f53df0..7301a63008f835 100644 --- a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp +++ b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp @@ -39,8 +39,10 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { unsigned Rs; int64_t Offset; MachineBasicBlock *MBB; + MachineInstr *MI; LSIns(MachineInstr *MI) { + this->MI = MI; MBB = MI->getParent(); Rt = MI->getOperand(0).getReg().id(); Rs = MI->getOperand(1).getReg().id(); @@ -49,6 +51,12 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { }; using InstrList = SmallVector; using MBBIter = MachineBasicBlock::iterator; + struct Candidate { + InstrList Sequence; + size_t GapSize; + bool Move = false; + }; + using CandidateList = SmallVector; static char ID; const MipsSubtarget *STI; const TargetInstrInfo *TII; @@ -69,9 +77,11 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { unsigned getRegNo(unsigned Reg); bool isValidLoadStore(MachineInstr &MI, bool IsLoad, InstrList); bool isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize, - size_t &CurrSeqSize); + size_t &CurrSeqSize, bool &RegGap); bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad); void sortLoadStoreList(InstrList &LoadStoreList, bool IsLoad); + void findCandidatesForOptimization(InstrList &LoadStoreList, + CandidateList &Candidates); }; } // namespace @@ -131,6 +141,53 @@ void NMLoadStoreMultipleOpt::sortLoadStoreList(InstrList &LoadStoreList, std::sort(LoadStoreList.begin(), LoadStoreList.end(), CompareInstructions); } +void NMLoadStoreMultipleOpt::findCandidatesForOptimization( + InstrList &LoadStoreList, CandidateList &Candidates) { + InstrList Sequence; + size_t GapSize = 0, SeqSize = 0; + bool RegGap = false; + + auto clearSeqence = [&Sequence, &GapSize, &SeqSize, &RegGap]() { + Sequence.clear(); + GapSize = 0; + SeqSize = 0; + RegGap = false; + }; + + for (auto &MI : LoadStoreList) { + // Sequences cannot be longer than 8 instructions. + if (SeqSize == 8) { + Candidates.push_back({Sequence, GapSize}); + clearSeqence(); + } + // When starting a new sequence, there's no need to do any checks. + if (Sequence.empty()) { + Sequence.push_back(MI); + SeqSize = 1; + continue; + } + + if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize, RegGap)) { + if (SeqSize > 1) + Candidates.push_back({Sequence, GapSize}); + clearSeqence(); + } + + Sequence.push_back(MI); + SeqSize++; + + if (RegGap) { + Candidates.push_back({Sequence, GapSize, true}); + clearSeqence(); + } + } + + // Save the last valid sequence for this list. At least 2 instructions are + // neccessary for a valid sequence. + if (SeqSize > 1) + Candidates.push_back({Sequence, GapSize}); +} + // All instruction in the seqence should have the same Rs register, and // different Rt register. bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad, @@ -181,43 +238,52 @@ bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad, bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize, - size_t &CurrSeqSize) { + size_t &CurrSeqSize, + bool &RegGap) { unsigned PrevRtNo = getRegNo(Prev.Rt); unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0; Register DesiredRtReg = RC.getRegister(DesiredRtNo); if (Next.Offset == Prev.Offset + 4) { + if (Next.Rt == DesiredRtReg) + return true; + // Next.Rt != DesiredRtReg // GAP, but offset ok // lw a0, 8(a4) // lw a1, 12(a4) // lw a3, 16(a4) - if (Next.Rt != DesiredRtReg) { - // TODO + // For now, the instruction like lw a3, 16(a4) interrupts the sequence. + if (CurrSeqSize < 2) return false; - } else { - return true; - } - } else { + + assert(Register::isPhysicalRegister(DesiredRtNo) && + "Desired register is not physical!"); + if (MachineBasicBlock::LQR_Dead != + Prev.MBB->computeRegisterLiveness(TRI, DesiredRtReg, Prev.MI)) + return false; + + RegGap = true; + return true; + } + // Next.Offset != Prev.Offset + 4 + bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0; + unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1); + if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) && + Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) { // "full" GAP // lw a0, 8(a4) // lw a1, 12(a4) // lw a3, 20(a4) - bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0; - unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1); - if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) && - Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) { - LivePhysRegs LiveRegs(*TRI); - computeLiveIns(LiveRegs, *Prev.MBB); - for (size_t i = 0; i < Gap; i++) { - assert(Register::isPhysicalRegister(DesiredRtNo + i) && - "Desired register is not physical!"); - if (!LiveRegs.available(*MRI, (DesiredRtReg))) - return false; - DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1); - } - GapSize += Gap; - CurrSeqSize += Gap; - return true; + for (size_t i = 0; i < Gap; i++) { + assert(Register::isPhysicalRegister(DesiredRtNo + i) && + "Desired register is not physical!"); + if (MachineBasicBlock::LQR_Dead != + Prev.MBB->computeRegisterLiveness(TRI, DesiredRtReg, Prev.MI)) + return false; + DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1); } + GapSize += Gap; + CurrSeqSize += Gap; + return true; } return false; } @@ -225,10 +291,11 @@ bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad) { bool Modified = false; - struct Candidate { - InstrList Sequence; - size_t GapSize; - }; + + // TODO: Consider allowing interspersed arithmetic/logical operations in + // load/store sequences to reduce sensitivity to instruction ordering. Note + // that proper scheduling models will alter instruction order, increasing + // mixed memory and compute operations. Dependency checks will be required. InstrList SequenceToSort; SmallVector SequenceList; for (auto &MI : MBB) { @@ -245,59 +312,11 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, } } - SmallVector Candidates; + CandidateList Candidates; InstrList Sequence; - size_t GapSize = 0; - size_t SeqSize = 0; for (size_t i = 0; i < SequenceList.size(); i++) { sortLoadStoreList(SequenceList[i], IsLoad); - for (auto &MI : SequenceList[i]) { - // Sequences cannot be longer than 8 instructions. - if (SeqSize == 8) { - Candidates.push_back({Sequence, GapSize}); - Sequence.clear(); - GapSize = 0; - SeqSize = 0; - } - // When starting a new sequence, there's no need to do any checks. - if (Sequence.empty()) { - Sequence.push_back(MI); - SeqSize = 1; - continue; - } - - if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize)) { - if (SeqSize > 1) - Candidates.push_back({Sequence, GapSize}); - Sequence.clear(); - GapSize = 0; - SeqSize = 0; - } - - Sequence.push_back(MI); - SeqSize++; - continue; - } - - // At least 2 instructions are neccessary for a valid sequence. - if (SeqSize > 1) { - Candidates.push_back({Sequence, GapSize}); - SeqSize++; - } - - // Sequence has either ended or has never been started. - if (!Sequence.empty()) { - Sequence.clear(); - SeqSize = 0; - GapSize = 0; - } - } - - // Make sure that the last sequence has been added to the Candidates list. - // TODO: Check if needed. - if (SeqSize > 1) { - Candidates.push_back({Sequence, GapSize}); - SeqSize++; + findCandidatesForOptimization(SequenceList[i], Candidates); } for (auto &C : Candidates) { @@ -318,6 +337,12 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, .addImm(Offset) .addImm(Seq.size() + C.GapSize); BMI.cloneMergedMemRefs(Seq); + if (C.Move) { + BuildMI(MBB, std::next(MBBIter(BMI.getInstr())), Base->getDebugLoc(), + TII->get(Mips::MOVE_NM)) + .addReg(Seq.back()->getOperand(0).getReg(), RegState::Define) + .addReg(Seq[Seq.size() - 2]->getOperand(0).getReg() + 1); + } for (auto *MI : Seq) { if (MI != Base) BMI.addReg(MI->getOperand(0).getReg(), diff --git a/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir new file mode 100644 index 00000000000000..a5d16fbd8814fc --- /dev/null +++ b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir @@ -0,0 +1,85 @@ + +# RUN: llc -mtriple=nanomips -verify-machineinstrs -run-pass nanomips-lwm-swm \ +# RUN: %s -o - | FileCheck %s + +# CHECK: SWM_NM $a1_nm, $sp_nm, 4, 4 +# CHECK-NEXT: $a5_nm = MOVE_NM $a4_nm +--- | + %struct.bar = type { i32, i32, i32 } + + define void @test4(i32 %n, ...) { + call void asm sideeffect "", ""() + ret void + } + + define void @square(%struct.bar* %ints) { + %a1 = bitcast %struct.bar* %ints to i32* + %1 = load i32, i32* %a1, align 4 + %b = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 1 + %2 = load i32, i32* %b, align 4 + %add = add nsw i32 %2, %1 + %c = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 2 + store i32 %add, i32* %c, align 4 + ret void + } + +... +--- +name: test4 +fixedStack: + - { id: 0, type: default, offset: -4, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: default, offset: -8, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, type: default, offset: -12, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, type: default, offset: -16, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, type: default, offset: -20, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, type: default, offset: -24, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0 (%ir-block.0): + liveins: $a1_nm, $a2_nm, $a3_nm, $a4_nm, $a5_nm, $a6_nm, $a7_nm + + SAVE_NM 32, implicit-def $sp_nm, implicit $sp_nm + CFI_INSTRUCTION def_cfa_offset 32 + SWs9_NM killed renamable $a7_nm, $sp_nm, 28 :: (store (s32)) + SWs9_NM killed renamable $a3_nm, $sp_nm, 12 :: (store (s32)) + SWs9_NM killed renamable $a2_nm, $sp_nm, 8 :: (store (s32) into %fixed-stack.5, align 8) + SWs9_NM killed renamable $a6_nm, $sp_nm, 24 :: (store (s32) into %fixed-stack.1, align 8) + SWs9_NM killed renamable $a5_nm, $sp_nm, 16 :: (store (s32) into %fixed-stack.3, align 16) + SWs9_NM killed renamable $a1_nm, $sp_nm, 4 :: (store (s32)) + INLINEASM &"", 1 /* sideeffect attdialect */ + RESTOREJRC_NM 32, implicit-def $sp_nm, implicit $sp_nm + +... +--- +name: square +body: | + bb.0 (%ir-block.0): + liveins: $a0_nm + + renamable $a1_nm = LW_NM renamable $a0_nm, 0 :: (load (s32) from %ir.a1) + renamable $a2_nm = LWs9_NM renamable $a0_nm, 4 :: (load (s32) from %ir.b) + renamable $a1_nm = nsw ADDu_NM killed renamable $a2_nm, killed renamable $a1_nm + SW_NM killed renamable $a1_nm, killed renamable $a0_nm, 8 :: (store (s32) into %ir.c) + PseudoReturnNM undef $ra_nm + +... From fb95d205949cedbb29deec60f1c7dd071b7a9bc9 Mon Sep 17 00:00:00 2001 From: Milica Lazarevic Date: Thu, 19 Sep 2024 20:24:59 +0200 Subject: [PATCH 5/5] Revert "NanoMIPS: NMLoadStoreMultiple add reg gap support" This reverts commit 5ea10a5a0c248e99953ec65b0e3fc893ef4d4a3b. --- .../Target/Mips/NanoMipsLoadStoreMultiple.cpp | 181 ++++++++---------- .../nanomips/loadstoremultiple_reg_gap.mir | 85 -------- 2 files changed, 78 insertions(+), 188 deletions(-) delete mode 100644 llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir diff --git a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp index 7301a63008f835..5f299461f53df0 100644 --- a/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp +++ b/llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp @@ -39,10 +39,8 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { unsigned Rs; int64_t Offset; MachineBasicBlock *MBB; - MachineInstr *MI; LSIns(MachineInstr *MI) { - this->MI = MI; MBB = MI->getParent(); Rt = MI->getOperand(0).getReg().id(); Rs = MI->getOperand(1).getReg().id(); @@ -51,12 +49,6 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { }; using InstrList = SmallVector; using MBBIter = MachineBasicBlock::iterator; - struct Candidate { - InstrList Sequence; - size_t GapSize; - bool Move = false; - }; - using CandidateList = SmallVector; static char ID; const MipsSubtarget *STI; const TargetInstrInfo *TII; @@ -77,11 +69,9 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass { unsigned getRegNo(unsigned Reg); bool isValidLoadStore(MachineInstr &MI, bool IsLoad, InstrList); bool isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize, - size_t &CurrSeqSize, bool &RegGap); + size_t &CurrSeqSize); bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad); void sortLoadStoreList(InstrList &LoadStoreList, bool IsLoad); - void findCandidatesForOptimization(InstrList &LoadStoreList, - CandidateList &Candidates); }; } // namespace @@ -141,53 +131,6 @@ void NMLoadStoreMultipleOpt::sortLoadStoreList(InstrList &LoadStoreList, std::sort(LoadStoreList.begin(), LoadStoreList.end(), CompareInstructions); } -void NMLoadStoreMultipleOpt::findCandidatesForOptimization( - InstrList &LoadStoreList, CandidateList &Candidates) { - InstrList Sequence; - size_t GapSize = 0, SeqSize = 0; - bool RegGap = false; - - auto clearSeqence = [&Sequence, &GapSize, &SeqSize, &RegGap]() { - Sequence.clear(); - GapSize = 0; - SeqSize = 0; - RegGap = false; - }; - - for (auto &MI : LoadStoreList) { - // Sequences cannot be longer than 8 instructions. - if (SeqSize == 8) { - Candidates.push_back({Sequence, GapSize}); - clearSeqence(); - } - // When starting a new sequence, there's no need to do any checks. - if (Sequence.empty()) { - Sequence.push_back(MI); - SeqSize = 1; - continue; - } - - if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize, RegGap)) { - if (SeqSize > 1) - Candidates.push_back({Sequence, GapSize}); - clearSeqence(); - } - - Sequence.push_back(MI); - SeqSize++; - - if (RegGap) { - Candidates.push_back({Sequence, GapSize, true}); - clearSeqence(); - } - } - - // Save the last valid sequence for this list. At least 2 instructions are - // neccessary for a valid sequence. - if (SeqSize > 1) - Candidates.push_back({Sequence, GapSize}); -} - // All instruction in the seqence should have the same Rs register, and // different Rt register. bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad, @@ -238,52 +181,43 @@ bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad, bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize, - size_t &CurrSeqSize, - bool &RegGap) { + size_t &CurrSeqSize) { unsigned PrevRtNo = getRegNo(Prev.Rt); unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0; Register DesiredRtReg = RC.getRegister(DesiredRtNo); if (Next.Offset == Prev.Offset + 4) { - if (Next.Rt == DesiredRtReg) - return true; - // Next.Rt != DesiredRtReg // GAP, but offset ok // lw a0, 8(a4) // lw a1, 12(a4) // lw a3, 16(a4) - // For now, the instruction like lw a3, 16(a4) interrupts the sequence. - if (CurrSeqSize < 2) + if (Next.Rt != DesiredRtReg) { + // TODO return false; - - assert(Register::isPhysicalRegister(DesiredRtNo) && - "Desired register is not physical!"); - if (MachineBasicBlock::LQR_Dead != - Prev.MBB->computeRegisterLiveness(TRI, DesiredRtReg, Prev.MI)) - return false; - - RegGap = true; - return true; - } - // Next.Offset != Prev.Offset + 4 - bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0; - unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1); - if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) && - Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) { + } else { + return true; + } + } else { // "full" GAP // lw a0, 8(a4) // lw a1, 12(a4) // lw a3, 20(a4) - for (size_t i = 0; i < Gap; i++) { - assert(Register::isPhysicalRegister(DesiredRtNo + i) && - "Desired register is not physical!"); - if (MachineBasicBlock::LQR_Dead != - Prev.MBB->computeRegisterLiveness(TRI, DesiredRtReg, Prev.MI)) - return false; - DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1); + bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0; + unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1); + if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) && + Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) { + LivePhysRegs LiveRegs(*TRI); + computeLiveIns(LiveRegs, *Prev.MBB); + for (size_t i = 0; i < Gap; i++) { + assert(Register::isPhysicalRegister(DesiredRtNo + i) && + "Desired register is not physical!"); + if (!LiveRegs.available(*MRI, (DesiredRtReg))) + return false; + DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1); + } + GapSize += Gap; + CurrSeqSize += Gap; + return true; } - GapSize += Gap; - CurrSeqSize += Gap; - return true; } return false; } @@ -291,11 +225,10 @@ bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next, bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad) { bool Modified = false; - - // TODO: Consider allowing interspersed arithmetic/logical operations in - // load/store sequences to reduce sensitivity to instruction ordering. Note - // that proper scheduling models will alter instruction order, increasing - // mixed memory and compute operations. Dependency checks will be required. + struct Candidate { + InstrList Sequence; + size_t GapSize; + }; InstrList SequenceToSort; SmallVector SequenceList; for (auto &MI : MBB) { @@ -312,11 +245,59 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, } } - CandidateList Candidates; + SmallVector Candidates; InstrList Sequence; + size_t GapSize = 0; + size_t SeqSize = 0; for (size_t i = 0; i < SequenceList.size(); i++) { sortLoadStoreList(SequenceList[i], IsLoad); - findCandidatesForOptimization(SequenceList[i], Candidates); + for (auto &MI : SequenceList[i]) { + // Sequences cannot be longer than 8 instructions. + if (SeqSize == 8) { + Candidates.push_back({Sequence, GapSize}); + Sequence.clear(); + GapSize = 0; + SeqSize = 0; + } + // When starting a new sequence, there's no need to do any checks. + if (Sequence.empty()) { + Sequence.push_back(MI); + SeqSize = 1; + continue; + } + + if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize)) { + if (SeqSize > 1) + Candidates.push_back({Sequence, GapSize}); + Sequence.clear(); + GapSize = 0; + SeqSize = 0; + } + + Sequence.push_back(MI); + SeqSize++; + continue; + } + + // At least 2 instructions are neccessary for a valid sequence. + if (SeqSize > 1) { + Candidates.push_back({Sequence, GapSize}); + SeqSize++; + } + + // Sequence has either ended or has never been started. + if (!Sequence.empty()) { + Sequence.clear(); + SeqSize = 0; + GapSize = 0; + } + } + + // Make sure that the last sequence has been added to the Candidates list. + // TODO: Check if needed. + if (SeqSize > 1) { + Candidates.push_back({Sequence, GapSize}); + SeqSize++; } for (auto &C : Candidates) { @@ -337,12 +318,6 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB, .addImm(Offset) .addImm(Seq.size() + C.GapSize); BMI.cloneMergedMemRefs(Seq); - if (C.Move) { - BuildMI(MBB, std::next(MBBIter(BMI.getInstr())), Base->getDebugLoc(), - TII->get(Mips::MOVE_NM)) - .addReg(Seq.back()->getOperand(0).getReg(), RegState::Define) - .addReg(Seq[Seq.size() - 2]->getOperand(0).getReg() + 1); - } for (auto *MI : Seq) { if (MI != Base) BMI.addReg(MI->getOperand(0).getReg(), diff --git a/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir b/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir deleted file mode 100644 index a5d16fbd8814fc..00000000000000 --- a/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir +++ /dev/null @@ -1,85 +0,0 @@ - -# RUN: llc -mtriple=nanomips -verify-machineinstrs -run-pass nanomips-lwm-swm \ -# RUN: %s -o - | FileCheck %s - -# CHECK: SWM_NM $a1_nm, $sp_nm, 4, 4 -# CHECK-NEXT: $a5_nm = MOVE_NM $a4_nm ---- | - %struct.bar = type { i32, i32, i32 } - - define void @test4(i32 %n, ...) { - call void asm sideeffect "", ""() - ret void - } - - define void @square(%struct.bar* %ints) { - %a1 = bitcast %struct.bar* %ints to i32* - %1 = load i32, i32* %a1, align 4 - %b = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 1 - %2 = load i32, i32* %b, align 4 - %add = add nsw i32 %2, %1 - %c = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 2 - store i32 %add, i32* %c, align 4 - ret void - } - -... ---- -name: test4 -fixedStack: - - { id: 0, type: default, offset: -4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: -8, size: 4, alignment: 8, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, type: default, offset: -12, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 3, type: default, offset: -16, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 4, type: default, offset: -20, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 5, type: default, offset: -24, size: 4, alignment: 8, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 6, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 7, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 8, type: default, offset: -28, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -body: | - bb.0 (%ir-block.0): - liveins: $a1_nm, $a2_nm, $a3_nm, $a4_nm, $a5_nm, $a6_nm, $a7_nm - - SAVE_NM 32, implicit-def $sp_nm, implicit $sp_nm - CFI_INSTRUCTION def_cfa_offset 32 - SWs9_NM killed renamable $a7_nm, $sp_nm, 28 :: (store (s32)) - SWs9_NM killed renamable $a3_nm, $sp_nm, 12 :: (store (s32)) - SWs9_NM killed renamable $a2_nm, $sp_nm, 8 :: (store (s32) into %fixed-stack.5, align 8) - SWs9_NM killed renamable $a6_nm, $sp_nm, 24 :: (store (s32) into %fixed-stack.1, align 8) - SWs9_NM killed renamable $a5_nm, $sp_nm, 16 :: (store (s32) into %fixed-stack.3, align 16) - SWs9_NM killed renamable $a1_nm, $sp_nm, 4 :: (store (s32)) - INLINEASM &"", 1 /* sideeffect attdialect */ - RESTOREJRC_NM 32, implicit-def $sp_nm, implicit $sp_nm - -... ---- -name: square -body: | - bb.0 (%ir-block.0): - liveins: $a0_nm - - renamable $a1_nm = LW_NM renamable $a0_nm, 0 :: (load (s32) from %ir.a1) - renamable $a2_nm = LWs9_NM renamable $a0_nm, 4 :: (load (s32) from %ir.b) - renamable $a1_nm = nsw ADDu_NM killed renamable $a2_nm, killed renamable $a1_nm - SW_NM killed renamable $a1_nm, killed renamable $a0_nm, 8 :: (store (s32) into %ir.c) - PseudoReturnNM undef $ra_nm - -...