Skip to content

Commit

Permalink
[LoopStripMine] Fix loop stripmining to handle Tapir loops where the …
Browse files Browse the repository at this point in the history
…tripcount and primary induction variable have different types.
  • Loading branch information
neboat committed Feb 17, 2021
1 parent 97e3422 commit fcfffe9
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 3 deletions.
14 changes: 13 additions & 1 deletion llvm/lib/Transforms/Tapir/LoopStripMine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,17 @@ static Task *getTapirLoopForStripMining(const Loop *L, TaskInfo &TI,
return nullptr;
}

// Tapir loops where the loop body does not reattach cannot be stripmined.
if (!llvm::any_of(predecessors(LatchBlock), [](const BasicBlock *B) {
return isa<ReattachInst>(B->getTerminator());
})) {
LLVM_DEBUG(dbgs() << " Can't stripmine: loop body does not reattach.\n");
if (ORE)
ORE->emit(TapirLoopInfo::createMissedAnalysis(LSM_NAME, "NoReattach", L)
<< "spawned loop body does not reattach");
return nullptr;
}

// The current loop-stripmine pass can only stripmine loops with a single
// latch that's a conditional branch exiting the loop.
// FIXME: The implementation can be extended to work with more complicated
Expand Down Expand Up @@ -1175,6 +1186,7 @@ Loop *llvm::StripMineLoop(
ReattachDom = DT->findNearestCommonDominator(ReattachDom, I->getParent());
ReplaceInstWithInst(I, BranchInst::Create(Latch));
}
assert(ReattachDom && "No reattach-dominator block found");
// Insert a reattach at the end of NewReattB.
ReplaceInstWithInst(NewReattB->getTerminator(),
ReattachInst::Create(NewLatch, NewSyncReg));
Expand Down Expand Up @@ -1330,7 +1342,7 @@ Loop *llvm::StripMineLoop(
// Update all of the old PHI nodes
B2.SetInsertPoint(NewEntry->getTerminator());
Instruction *CountVal = cast<Instruction>(
B2.CreateMul(ConstantInt::get(PrimaryInduction->getType(), Count),
B2.CreateMul(ConstantInt::get(NewIdx->getType(), Count),
NewIdx));
CountVal->copyIRFlags(PrimaryInduction);
for (auto &InductionEntry : *TL.getInductionVars()) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Tapir/LoopStripMinePass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ static bool tryToStripMineLoop(
return false;

// Copy metadata to remainder loop
if (RemainderLoop) {
if (RemainderLoop && OrigLoopID) {
// Optional<MDNode *> RemainderLoopID = makeFollowupLoopID(
// OrigLoopID, {}, "tapir.loop");
MDNode *NewRemainderLoopID =
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Utils/TapirUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,7 @@ void llvm::SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry,

// Update dominator tree.
if (DT) {
if (DT->dominates(Spawner, Continue))
if (ReattachDom && DT->dominates(Spawner, Continue))
DT->changeImmediateDominator(Continue, ReattachDom);
if (DI->hasUnwindDest())
DT->deleteEdge(Spawner, Unwind);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
; Check that loop stripmining properly handles Tapir loops where the
; primary IV and the tripcount have different types.
;
; RUN: opt < %s -loop-stripmine -S -o - | FileCheck %s
; RUN: opt < %s -passes='loop-stripmine' -S -o - | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: argmemonly nounwind willreturn
declare token @llvm.syncregion.start() #0

define dso_local fastcc void @_ZL9conj_gradPiS_PdS0_S0_S0_S0_S0_S0_S0_() unnamed_addr #1 {
entry:
%syncreg143 = tail call token @llvm.syncregion.start()
detach within %syncreg143, label %pfor.body.i, label %pfor.inc.i

pfor.body.i: ; preds = %entry
unreachable

pfor.inc.i: ; preds = %entry
sync within %syncreg143, label %_Z10initializePdS_S_S_S_mm.exit

_Z10initializePdS_S_S_S_mm.exit: ; preds = %pfor.inc.i
detach within %syncreg143, label %pfor.body, label %pfor.inc

pfor.body: ; preds = %_Z10initializePdS_S_S_S_mm.exit
unreachable

pfor.inc: ; preds = %_Z10initializePdS_S_S_S_mm.exit
sync within %syncreg143, label %sync.continue

sync.continue: ; preds = %pfor.inc
detach within %syncreg143, label %pfor.body.i122, label %pfor.inc.i124

pfor.body.i122: ; preds = %sync.continue
unreachable

pfor.inc.i124: ; preds = %sync.continue
sync within %syncreg143, label %pfor.cond.i136

pfor.cond.i136: ; preds = %pfor.inc.i124
detach within %syncreg143, label %pfor.body.i142, label %pfor.inc.i144

pfor.body.i142: ; preds = %pfor.cond.i136
unreachable

pfor.inc.i144: ; preds = %pfor.cond.i136
sync within %syncreg143, label %cleanup.tf.tfend

cleanup.tf.tfend: ; preds = %pfor.inc.i144
detach within %syncreg143, label %pfor.body.i105, label %pfor.inc.i107

pfor.body.i105: ; preds = %cleanup.tf.tfend
unreachable

pfor.inc.i107: ; preds = %cleanup.tf.tfend
sync within %syncreg143, label %_Z11map_add_mulPdS_S_dmm.exit.tfend.tfend

_Z11map_add_mulPdS_S_dmm.exit.tfend.tfend: ; preds = %pfor.inc.i107
detach within %syncreg143, label %pfor.body73, label %pfor.inc101

pfor.body73: ; preds = %_Z11map_add_mulPdS_S_dmm.exit.tfend.tfend
unreachable

pfor.inc101: ; preds = %_Z11map_add_mulPdS_S_dmm.exit.tfend.tfend
sync within %syncreg143, label %sync.continue106

sync.continue106: ; preds = %pfor.inc101
br label %pfor.cond157

pfor.cond157: ; preds = %pfor.inc177, %sync.continue106
%indvars.iv164 = phi i64 [ 0, %sync.continue106 ], [ %indvars.iv.next165, %pfor.inc177 ]
%indvars.iv.next165 = add nuw nsw i64 %indvars.iv164, 1
detach within %syncreg143, label %pfor.body163, label %pfor.inc177

pfor.body163: ; preds = %pfor.cond157
reattach within %syncreg143, label %pfor.inc177

pfor.inc177: ; preds = %pfor.body163, %pfor.cond157
%lftr.wideiv = trunc i64 %indvars.iv.next165 to i32
%exitcond = icmp eq i32 undef, %lftr.wideiv
br i1 %exitcond, label %pfor.cond.cleanup180, label %pfor.cond157

; CHECK: pfor.cond157.strpm.outer:
; CHECK: %[[NITER:.+]] = phi i32 [ 0, %sync.continue106.new ], [ %[[NITER_ADD:.+]], %pfor.inc177.strpm.outer ]
; CHECK: detach within %syncreg143, label %pfor.body163.strpm.outer, label %pfor.inc177.strpm.outer

; CHECK: pfor.body163.strpm.outer:
; CHECK: mul i32 2048, %[[NITER]]

; CHECK: pfor.inc177.strpm.outer:
; CHECK: %[[NITER_ADD]] = add {{.*}}i32 %[[NITER]], 1

; CHECK: pfor.cond157.epil:
; CHECK: %[[INDVAR_EPIL:.+]] = phi i64
; CHECK: %[[EPIL_ITER:.+]] = phi i32

; CHECK: pfor.inc177.epil:
; CHECK: %[[EPIL_ITER_SUB:.+]] = sub i32 %[[EPIL_ITER]], 1
; CHECK: icmp ne i32 %[[EPIL_ITER_SUB]], 0

pfor.cond.cleanup180: ; preds = %pfor.inc177
unreachable
}

attributes #0 = { argmemonly nounwind willreturn }
attributes #1 = { "use-soft-float"="false" }

!llvm.ident = !{!0}

!0 = !{!"clang version 10.0.1 ([email protected]:neboat/opencilk-project.git 2c7e581b441a9ae5682f02090613d00aaa26460d)"}

0 comments on commit fcfffe9

Please sign in to comment.