Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CilkABI] Fix declaration of __cilkrts_get_nworkers() to use MemoryEf… #280

Merged
merged 1 commit into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Transforms/Tapir/CilkABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,13 @@ FunctionCallee CilkABI::Get__cilkrts_get_nworkers() {

LLVMContext &C = M.getContext();
AttributeList AL;
AL = AL.addFnAttribute(C, Attribute::ReadNone);
AL = AL.addFnAttribute(C, Attribute::NoUnwind);
FunctionType *FTy = FunctionType::get(Type::getInt32Ty(C), {}, false);
CilkRTSGetNworkers = M.getOrInsertFunction("__cilkrts_get_nworkers", FTy, AL);
if (Function *Fn = dyn_cast<Function>(CilkRTSGetNworkers.getCallee())) {
MemoryEffects ME = MemoryEffects::inaccessibleMemOnly(ModRefInfo::Ref);
Fn->setMemoryEffects(ME);
}
return CilkRTSGetNworkers;
}

Expand Down
105 changes: 105 additions & 0 deletions llvm/test/Transforms/Tapir/cilkplus-get-nworkers.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
; Check that the Intel Cilk Plus function __cilkrts_get_nworkers()
; is properly declared in the IR.
;
; RUN: opt < %s -passes="tapir2target" -tapir-target=cilk -S | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-macosx15.0.0"

; Function Attrs: nounwind ssp uwtable(sync)
define void @normalize(ptr noalias nocapture noundef writeonly %out, ptr noalias noundef %in, i32 noundef %n) local_unnamed_addr #0 {
entry:
%cmp = icmp sgt i32 %n, 0
br i1 %cmp, label %pfor.cond.preheader, label %cleanup

pfor.cond.preheader: ; preds = %entry
%wide.trip.count = zext nneg i32 %n to i64
%0 = call i64 @llvm.tapir.loop.grainsize.i64(i64 %wide.trip.count)
call fastcc void @normalize.outline_pfor.cond.ls1(i64 0, i64 %wide.trip.count, i64 %0, ptr %in, ptr %out, i32 %n) #4
br label %cleanup

cleanup: ; preds = %pfor.cond.preheader, %entry
ret void
}

; CHECK: define {{.*}}void @normalize(
; CHECK: call i32 @__cilkrts_get_nworkers()
; CHECK: call {{.*}}void @normalize.outline_pfor.cond.ls1(

; CHECK: declare i32 @__cilkrts_get_nworkers()

; Function Attrs: nounwind willreturn memory(argmem: readwrite)
declare token @llvm.syncregion.start() #1

declare double @norm(ptr noundef, i32 noundef) local_unnamed_addr #2

; Function Attrs: nounwind speculatable willreturn memory(none)
declare i64 @llvm.tapir.loop.grainsize.i64(i64) #3

; Function Attrs: nounwind ssp uwtable(sync)
define internal fastcc void @normalize.outline_pfor.cond.ls1(i64 %indvars.iv.start.ls1, i64 %end.ls1, i64 %grainsize.ls1, ptr noalias noundef align 1 %in.ls1, ptr noalias nocapture noundef writeonly align 1 %out.ls1, i32 noundef %n.ls1) unnamed_addr #0 {
pfor.cond.preheader.ls1:
%0 = tail call token @llvm.syncregion.start()
%itercount1 = sub i64 %end.ls1, %indvars.iv.start.ls1
%1 = icmp ugt i64 %itercount1, %grainsize.ls1
br i1 %1, label %.lr.ph, label %pfor.cond.ls1.preheader

pfor.cond.ls1.preheader: ; preds = %pfor.cond.preheader.ls1.dac.cont, %pfor.cond.preheader.ls1
%indvars.iv.ls1.dac.lcssa = phi i64 [ %indvars.iv.start.ls1, %pfor.cond.preheader.ls1 ], [ %miditer, %pfor.cond.preheader.ls1.dac.cont ]
br label %pfor.cond.ls1

.lr.ph: ; preds = %pfor.cond.preheader.ls1, %pfor.cond.preheader.ls1.dac.cont
%itercount3 = phi i64 [ %itercount, %pfor.cond.preheader.ls1.dac.cont ], [ %itercount1, %pfor.cond.preheader.ls1 ]
%indvars.iv.ls1.dac2 = phi i64 [ %miditer, %pfor.cond.preheader.ls1.dac.cont ], [ %indvars.iv.start.ls1, %pfor.cond.preheader.ls1 ]
%halfcount = lshr i64 %itercount3, 1
%miditer = add nuw nsw i64 %halfcount, %indvars.iv.ls1.dac2
detach within %0, label %pfor.cond.preheader.ls1.dac.detach, label %pfor.cond.preheader.ls1.dac.cont

pfor.cond.preheader.ls1.dac.detach: ; preds = %.lr.ph
call fastcc void @normalize.outline_pfor.cond.ls1(i64 %indvars.iv.ls1.dac2, i64 %miditer, i64 %grainsize.ls1, ptr %in.ls1, ptr %out.ls1, i32 %n.ls1) #4
reattach within %0, label %pfor.cond.preheader.ls1.dac.cont

pfor.cond.preheader.ls1.dac.cont: ; preds = %pfor.cond.preheader.ls1.dac.detach, %.lr.ph
%itercount = sub i64 %end.ls1, %miditer
%2 = icmp ugt i64 %itercount, %grainsize.ls1
br i1 %2, label %.lr.ph, label %pfor.cond.ls1.preheader

pfor.cond.ls1: ; preds = %pfor.cond.ls1.preheader, %pfor.cond.ls1
%indvars.iv.ls1 = phi i64 [ %indvars.iv.next.ls1, %pfor.cond.ls1 ], [ %indvars.iv.ls1.dac.lcssa, %pfor.cond.ls1.preheader ]
%indvars.iv.next.ls1 = add nuw nsw i64 %indvars.iv.ls1, 1
%arrayidx.ls1 = getelementptr inbounds double, ptr %in.ls1, i64 %indvars.iv.ls1
%3 = load double, ptr %arrayidx.ls1, align 8, !tbaa !6
%call.ls1 = tail call double @norm(ptr noundef %in.ls1, i32 noundef %n.ls1) #4
%div.ls1 = fdiv double %3, %call.ls1
%arrayidx2.ls1 = getelementptr inbounds double, ptr %out.ls1, i64 %indvars.iv.ls1
store double %div.ls1, ptr %arrayidx2.ls1, align 8, !tbaa !6
%exitcond.not.ls1 = icmp eq i64 %indvars.iv.next.ls1, %end.ls1
br i1 %exitcond.not.ls1, label %pfor.cond.cleanup.ls1, label %pfor.cond.ls1, !llvm.loop !10

pfor.cond.cleanup.ls1: ; preds = %pfor.cond.ls1
sync within %0, label %pfor.cond.cleanup.ls1.synced

pfor.cond.cleanup.ls1.synced: ; preds = %pfor.cond.cleanup.ls1
ret void
}

attributes #0 = { nounwind ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
attributes #1 = { nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
attributes #3 = { nounwind speculatable willreturn memory(none) }
attributes #4 = { nounwind }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 15, i32 0]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 8, !"PIC Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 1}
!4 = !{i32 7, !"frame-pointer", i32 1}
!5 = !{!"clang version 18.1.8 ([email protected]:OpenCilk/opencilk-project.git f78505e37b91651d57f439f34cbcb441c98b59a1)"}
!6 = !{!7, !7, i64 0}
!7 = !{!"double", !8, i64 0}
!8 = !{!"omnipotent char", !9, i64 0}
!9 = !{!"Simple C/C++ TBAA"}
!10 = distinct !{!10, !11}
!11 = !{!"tapir.loop.spawn.strategy", i32 0}
Loading