From 5fb178bd53b19eb1e61dfe6962b5e9e989bb686c Mon Sep 17 00:00:00 2001 From: Marcel Koester Date: Sun, 1 Oct 2023 20:23:24 +0200 Subject: [PATCH 1/6] Added Velocity intrinsic wrapper class. --- .../Backends/Velocity/VelocityIntrinsic.cs | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 Src/ILGPU/Backends/Velocity/VelocityIntrinsic.cs diff --git a/Src/ILGPU/Backends/Velocity/VelocityIntrinsic.cs b/Src/ILGPU/Backends/Velocity/VelocityIntrinsic.cs new file mode 100644 index 000000000..15a4a7f7b --- /dev/null +++ b/Src/ILGPU/Backends/Velocity/VelocityIntrinsic.cs @@ -0,0 +1,67 @@ +// --------------------------------------------------------------------------------------- +// ILGPU +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: VelocityIntrinsic.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.IR.Intrinsics; +using System; +using System.Reflection; + +namespace ILGPU.Backends.Velocity +{ + /// + /// Represents a specific handler for user defined code-generation functionality + /// that is compatible with the . + /// + public sealed class VelocityIntrinsic : IntrinsicImplementation + { + #region Instance + + /// + /// Constructs a new Velocity intrinsic. + /// + /// The associated target method. + /// The code-generation mode. + public VelocityIntrinsic( + MethodInfo targetMethod, + IntrinsicImplementationMode mode) + : base( + BackendType.Velocity, + targetMethod, + mode) + { } + + /// + /// Constructs a new Velocity intrinsic. + /// + /// The associated target handler type. + /// The target method name (or null). + /// The code-generation mode. + public VelocityIntrinsic( + Type handlerType, + string methodName, + IntrinsicImplementationMode mode) + : base( + BackendType.Velocity, + handlerType, + methodName, + mode) + { } + + #endregion + + #region Methods + + /// + protected internal override bool CanHandleBackend(Backend backend) => + backend.BackendType == BackendType.Velocity; + + #endregion + } +} From fb62ac976d3d79a6d171c49824c36d8fb5caacfa Mon Sep 17 00:00:00 2001 From: Marcel Koester Date: Sun, 1 Oct 2023 20:23:24 +0200 Subject: [PATCH 2/6] Added VelocityIntrinsics wrappers to remap warp and group operations. --- .gitignore | 1 + .../Velocity/VelocityIntrinsics.Generated.tt | 121 ++++++++++++++++++ .../Backends/Velocity/VelocityIntrinsics.cs | 79 ++++++++++++ 3 files changed, 201 insertions(+) create mode 100644 Src/ILGPU/Backends/Velocity/VelocityIntrinsics.Generated.tt create mode 100644 Src/ILGPU/Backends/Velocity/VelocityIntrinsics.cs diff --git a/.gitignore b/.gitignore index 0fb0024c3..5f0d1e8fa 100644 --- a/.gitignore +++ b/.gitignore @@ -258,6 +258,7 @@ Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.cs Src/ILGPU/Backends/PTX/PTXLibDeviceMethods.cs Src/ILGPU/Backends/PTX/PTXLibDeviceNvvm.cs Src/ILGPU/Backends/Velocity/Scalar/ScalarOperations.cs +Src/ILGPU/Backends/Velocity/VelocityIntrinsics.Generated.cs Src/ILGPU/Frontend/Intrinsic/RemappedIntrinsics.Generated.cs Src/ILGPU/HalfConversion.cs Src/ILGPU/IR/Construction/ArithmeticOperations.cs diff --git a/Src/ILGPU/Backends/Velocity/VelocityIntrinsics.Generated.tt b/Src/ILGPU/Backends/Velocity/VelocityIntrinsics.Generated.tt new file mode 100644 index 000000000..5ad06c4ed --- /dev/null +++ b/Src/ILGPU/Backends/Velocity/VelocityIntrinsics.Generated.tt @@ -0,0 +1,121 @@ +// --------------------------------------------------------------------------------------- +// ILGPU +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: VelocityIntrinsics.Generated.tt/VelocityIntrinsics.Generated.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +<#@ template debug="false" hostspecific="false" language="C#" #> +<#@ include file="../../Static/TypeInformation.ttinclude" #> +<#@ assembly name="System.Core" #> +<#@ import namespace="System.Linq" #> +<#@ import namespace="System.Text" #> +<#@ import namespace="System.Collections.Generic" #> +<#@ output extension=".cs" #> +using ILGPU.IR.Intrinsics; +using ILGPU.IR.Values; +using ILGPU.Runtime.Cuda; +using System.Runtime.CompilerServices; + +namespace ILGPU.Backends.Velocity +{ + partial class VelocityIntrinsics + { + #region Warp Shuffles + + /// + /// Registers all Velocity warp intrinsics with the given manager. + /// + /// The target implementation manager. + private static void RegisterWarpShuffles(IntrinsicImplementationManager manager) + { +<# foreach (var (operation, kind) in ShuffleOperations) { #> + manager.RegisterWarpShuffle( + ShuffleKind.<#= kind #>, + BasicValueType.Int64, + CreateIntrinsic( + nameof(Warp<#= operation #>Int64), + IntrinsicImplementationMode.Redirect)); + manager.RegisterWarpShuffle( + ShuffleKind.<#= kind #>, + BasicValueType.Float64, + CreateIntrinsic( + nameof(Warp<#= operation #>Float64), + IntrinsicImplementationMode.Redirect)); + + manager.RegisterSubWarpShuffle( + ShuffleKind.<#= kind #>, + BasicValueType.Int64, + CreateIntrinsic( + nameof(Warp<#= operation #>Int64), + IntrinsicImplementationMode.Redirect)); + manager.RegisterSubWarpShuffle( + ShuffleKind.<#= kind #>, + BasicValueType.Float64, + CreateIntrinsic( + nameof(Warp<#= operation #>Float64), + IntrinsicImplementationMode.Redirect)); + +<# } #> + } + +<# foreach (var (operation, _) in ShuffleOperations) { #> + /// + /// Wraps a single warp-shuffle operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ulong Warp<#= operation #>Int64(ulong value, int idx) + { + var parts = IntrinsicMath.Decompose(value); + parts.Lower = Warp.<#= operation #>(parts.Lower, idx); + parts.Upper = Warp.<#= operation #>(parts.Upper, idx); + return parts.ToULong(); + } + + /// + /// Wraps a single warp-shuffle operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double Warp<#= operation #>Float64(double value, int idx) + { + var shuffled = Warp<#= operation #>Int64(Interop.FloatAsInt(value), idx); + return Interop.IntAsFloat(shuffled); + } + + /// + /// Wraps a single sub-warp-shuffle operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ulong SubWarp<#= operation #>Int64(ulong value, int idx, int width) + { + var parts = IntrinsicMath.Decompose(value); + parts.Lower = Warp.<#= operation #>(parts.Lower, idx, width); + parts.Upper = Warp.<#= operation #>(parts.Upper, idx, width); + return parts.ToULong(); + } + + /// + /// Wraps a single sub-warp-shuffle operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double SubWarp<#= operation #>Float64( + double value, + int idx, + int width) + { + var shuffled = SubWarp<#= operation #>Int64( + Interop.FloatAsInt(value), + idx, + width); + return Interop.IntAsFloat(shuffled); + } + +<# } #> + + #endregion + } +} \ No newline at end of file diff --git a/Src/ILGPU/Backends/Velocity/VelocityIntrinsics.cs b/Src/ILGPU/Backends/Velocity/VelocityIntrinsics.cs new file mode 100644 index 000000000..9a197b8f5 --- /dev/null +++ b/Src/ILGPU/Backends/Velocity/VelocityIntrinsics.cs @@ -0,0 +1,79 @@ +// --------------------------------------------------------------------------------------- +// ILGPU +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: VelocityIntrinsics.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.IR.Intrinsics; +using ILGPU.IR.Values; +using System.Runtime.CompilerServices; + +namespace ILGPU.Backends.Velocity +{ + /// + /// Implements and initializes Velocity intrinsics. + /// + static partial class VelocityIntrinsics + { + #region Specializers + + /// + /// Creates a new Velocity intrinsic. + /// + /// The name of the intrinsic. + /// The implementation mode. + /// The created intrinsic. + private static VelocityIntrinsic CreateIntrinsic( + string name, + IntrinsicImplementationMode mode) => + new(typeof(VelocityIntrinsics), name, mode); + + /// + /// Registers all Velocity intrinsics with the given manager. + /// + /// The target implementation manager. + public static void Register(IntrinsicImplementationManager manager) + { + RegisterBroadcasts(manager); + RegisterWarpShuffles(manager); + } + + #endregion + + #region Broadcasts + + /// + /// Registers all broadcast intrinsics with the given manager. + /// + /// The target implementation manager. + private static void RegisterBroadcasts( + IntrinsicImplementationManager manager) + { + manager.RegisterBroadcast( + BroadcastKind.GroupLevel, + CreateIntrinsic( + nameof(GroupAndWarpBroadcast), + IntrinsicImplementationMode.Redirect)); + manager.RegisterBroadcast( + BroadcastKind.WarpLevel, + CreateIntrinsic( + nameof(GroupAndWarpBroadcast), + IntrinsicImplementationMode.Redirect)); + } + + /// + /// Wraps a single warp and group-broadcast operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static T GroupAndWarpBroadcast(T value, int laneIndex) + where T : unmanaged => + Warp.Shuffle(value, laneIndex); + + #endregion + } +} From c6151f32588e2b339c09ec4e51c1b56ffe47fab8 Mon Sep 17 00:00:00 2001 From: Marcel Koester Date: Sun, 1 Oct 2023 20:23:24 +0200 Subject: [PATCH 3/6] Added VelocityIntrinsics registration to ContextBuilder. --- Src/ILGPU/Context.Builder.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Src/ILGPU/Context.Builder.cs b/Src/ILGPU/Context.Builder.cs index 28aaab010..3a3519ce2 100644 --- a/Src/ILGPU/Context.Builder.cs +++ b/Src/ILGPU/Context.Builder.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2021 ILGPU Project +// Copyright (c) 2021-2023 ILGPU Project // www.ilgpu.net // // File: Context.Builder.cs @@ -11,6 +11,7 @@ using ILGPU.Backends.OpenCL; using ILGPU.Backends.PTX; +using ILGPU.Backends.Velocity; using ILGPU.IR.Intrinsics; using ILGPU.Resources; using ILGPU.Runtime; @@ -49,6 +50,7 @@ internal Builder() // Register intrinsics PTXIntrinsics.Register(IntrinsicManager); CLIntrinsics.Register(IntrinsicManager); + VelocityIntrinsics.Register(IntrinsicManager); } #endregion From 6220bc302a5824a8d9261b5e164050b2da904d97 Mon Sep 17 00:00:00 2001 From: Marcel Koester Date: Sun, 1 Oct 2023 20:23:24 +0200 Subject: [PATCH 4/6] Removed 64bit shuffles and broadcasts from code generators. --- .../Velocity/VelocityCodeGenerator.Threads.cs | 81 +++++-------------- .../Velocity/VelocityTargetSpecializer.cs | 36 --------- 2 files changed, 22 insertions(+), 95 deletions(-) diff --git a/Src/ILGPU/Backends/Velocity/VelocityCodeGenerator.Threads.cs b/Src/ILGPU/Backends/Velocity/VelocityCodeGenerator.Threads.cs index 7f3aa854a..552db02be 100644 --- a/Src/ILGPU/Backends/Velocity/VelocityCodeGenerator.Threads.cs +++ b/Src/ILGPU/Backends/Velocity/VelocityCodeGenerator.Threads.cs @@ -152,26 +152,18 @@ public void GenerateCode(PredicateBarrier barrier) Load(barrier.Predicate); // Load and call predicate operation - bool is32Bit = barrier.IsTreatedAs32Bit(); + if (!barrier.IsTreatedAs32Bit()) + throw new InternalCompilerException(); switch (barrier.Kind) { case PredicateBarrierKind.PopCount: - if (is32Bit) - Specializer.BarrierPopCount32(Emitter); - else - Specializer.BarrierPopCount64(Emitter); + Specializer.BarrierPopCount32(Emitter); break; case PredicateBarrierKind.And: - if (is32Bit) - Specializer.BarrierAnd32(Emitter); - else - Specializer.BarrierAnd64(Emitter); + Specializer.BarrierAnd32(Emitter); break; case PredicateBarrierKind.Or: - if (is32Bit) - Specializer.BarrierOr32(Emitter); - else - Specializer.BarrierOr64(Emitter); + Specializer.BarrierOr32(Emitter); break; default: throw new NotSupportedException(); @@ -185,21 +177,8 @@ public void GenerateCode(Barrier barrier) => Specializer.Barrier(Emitter); /// - public void GenerateCode(Broadcast broadcast) - { - // Load the source variable - Emitter.Emit(LocalOperation.Load, GetBlockMask(broadcast.BasicBlock)); - Load(broadcast.Variable); - Load(broadcast.Origin); - - // Get the appropriate broadcast operation - if (broadcast.IsTreatedAs32Bit()) - Specializer.Broadcast32(Emitter); - else - Specializer.Broadcast64(Emitter); - - Store(broadcast); - } + public void GenerateCode(Broadcast broadcast) => + throw new InternalCompilerException(); /// public void GenerateCode(WarpShuffle shuffle) @@ -209,33 +188,23 @@ public void GenerateCode(WarpShuffle shuffle) Load(shuffle.Variable); Load(shuffle.Origin); - // Get the appropriate broadcast operation - bool is32Bit = shuffle.IsTreatedAs32Bit(); + // Make sure we are compiling 32bit versions only + if (!shuffle.IsTreatedAs32Bit()) + throw new InternalCompilerException(); + switch (shuffle.Kind) { case ShuffleKind.Generic: - if (is32Bit) - Specializer.Shuffle32(Emitter); - else - Specializer.Shuffle64(Emitter); + Specializer.Shuffle32(Emitter); break; case ShuffleKind.Up: - if (is32Bit) - Specializer.ShuffleUp32(Emitter); - else - Specializer.ShuffleUp64(Emitter); + Specializer.ShuffleUp32(Emitter); break; case ShuffleKind.Down: - if (is32Bit) - Specializer.ShuffleDown32(Emitter); - else - Specializer.ShuffleDown64(Emitter); + Specializer.ShuffleDown32(Emitter); break; case ShuffleKind.Xor: - if (is32Bit) - Specializer.ShuffleXor32(Emitter); - else - Specializer.ShuffleXor64(Emitter); + Specializer.ShuffleXor32(Emitter); break; default: throw new NotSupportedException(); @@ -254,27 +223,21 @@ public void GenerateCode(SubWarpShuffle shuffle) Load(shuffle.Origin); Load(shuffle.Width); + // Make sure we are compiling 32bit versions only + if (!shuffle.IsTreatedAs32Bit()) + throw new InternalCompilerException(); + // Get the appropriate broadcast operation - bool is32Bit = shuffle.IsTreatedAs32Bit(); switch (shuffle.Kind) { case ShuffleKind.Up: - if (is32Bit) - Specializer.SubShuffleUp32(Emitter); - else - Specializer.SubShuffleUp64(Emitter); + Specializer.SubShuffleUp32(Emitter); break; case ShuffleKind.Down: - if (is32Bit) - Specializer.SubShuffleDown32(Emitter); - else - Specializer.SubShuffleDown64(Emitter); + Specializer.SubShuffleDown32(Emitter); break; case ShuffleKind.Xor: - if (is32Bit) - Specializer.SubShuffleXor32(Emitter); - else - Specializer.SubShuffleXor64(Emitter); + Specializer.SubShuffleXor32(Emitter); break; default: throw new NotSupportedException(); diff --git a/Src/ILGPU/Backends/Velocity/VelocityTargetSpecializer.cs b/Src/ILGPU/Backends/Velocity/VelocityTargetSpecializer.cs index e8cd73e95..7ab002f9c 100644 --- a/Src/ILGPU/Backends/Velocity/VelocityTargetSpecializer.cs +++ b/Src/ILGPU/Backends/Velocity/VelocityTargetSpecializer.cs @@ -366,69 +366,33 @@ public virtual void Barrier(TILEmitter emitter) public abstract void BarrierPopCount32(TILEmitter emitter) where TILEmitter : struct, IILEmitter; - public abstract void BarrierPopCount64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - public abstract void BarrierAnd32(TILEmitter emitter) where TILEmitter : struct, IILEmitter; - public abstract void BarrierAnd64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - public abstract void BarrierOr32(TILEmitter emitter) where TILEmitter : struct, IILEmitter; - public abstract void BarrierOr64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - - public abstract void Broadcast32(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - - public abstract void Broadcast64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - public abstract void Shuffle32(TILEmitter emitter) where TILEmitter : struct, IILEmitter; - public abstract void Shuffle64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - public abstract void ShuffleUp32(TILEmitter emitter) where TILEmitter : struct, IILEmitter; - public abstract void ShuffleUp64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - public abstract void SubShuffleUp32(TILEmitter emitter) where TILEmitter : struct, IILEmitter; - public abstract void SubShuffleUp64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - public abstract void ShuffleDown32(TILEmitter emitter) where TILEmitter : struct, IILEmitter; - public abstract void ShuffleDown64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - public abstract void SubShuffleDown32(TILEmitter emitter) where TILEmitter : struct, IILEmitter; - public abstract void SubShuffleDown64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - public abstract void ShuffleXor32(TILEmitter emitter) where TILEmitter : struct, IILEmitter; - public abstract void ShuffleXor64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - public abstract void SubShuffleXor32(TILEmitter emitter) where TILEmitter : struct, IILEmitter; - public abstract void SubShuffleXor64(TILEmitter emitter) - where TILEmitter : struct, IILEmitter; - #endregion #region IO From eb88dc884d287701cfab12b44052503a7e174476 Mon Sep 17 00:00:00 2001 From: Marcel Koester Date: Sun, 1 Oct 2023 20:23:25 +0200 Subject: [PATCH 5/6] Adapted Scalar code driver. --- Src/ILGPU/Backends/Velocity/Scalar/Scalar.cs | 36 -------------------- 1 file changed, 36 deletions(-) diff --git a/Src/ILGPU/Backends/Velocity/Scalar/Scalar.cs b/Src/ILGPU/Backends/Velocity/Scalar/Scalar.cs index 7074544f7..3362d9861 100644 --- a/Src/ILGPU/Backends/Velocity/Scalar/Scalar.cs +++ b/Src/ILGPU/Backends/Velocity/Scalar/Scalar.cs @@ -317,69 +317,33 @@ public override void Atomic64( public override void BarrierPopCount32(TILEmitter emitter) => emitter.EmitCall(ScalarOperations2.BarrierPopCount32Method); - public override void BarrierPopCount64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.BarrierPopCount64Method); - public override void BarrierAnd32(TILEmitter emitter) => emitter.EmitCall(ScalarOperations2.BarrierAnd32Method); - public override void BarrierAnd64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.BarrierAnd64Method); - public override void BarrierOr32(TILEmitter emitter) => emitter.EmitCall(ScalarOperations2.BarrierOr32Method); - public override void BarrierOr64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.BarrierOr64Method); - - public override void Broadcast32(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.Broadcast32Method); - - public override void Broadcast64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.Broadcast64Method); - public override void Shuffle32(TILEmitter emitter) => emitter.EmitCall(ScalarOperations2.Shuffle32Method); - public override void Shuffle64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.Shuffle64Method); - public override void ShuffleUp32(TILEmitter emitter) => emitter.EmitCall(ScalarOperations2.ShuffleUp32Method); - public override void ShuffleUp64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.ShuffleUp64Method); - public override void SubShuffleUp32(TILEmitter emitter) => emitter.EmitCall(ScalarOperations2.SubShuffleUp32Method); - public override void SubShuffleUp64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.SubShuffleUp64Method); - public override void ShuffleDown32(TILEmitter emitter) => emitter.EmitCall(ScalarOperations2.ShuffleDown32Method); - public override void ShuffleDown64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.ShuffleDown64Method); - public override void SubShuffleDown32(TILEmitter emitter) => emitter.EmitCall(ScalarOperations2.SubShuffleDown32Method); - public override void SubShuffleDown64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.SubShuffleDown64Method); - public override void ShuffleXor32(TILEmitter emitter) => emitter.EmitCall(ScalarOperations2.ShuffleXor32Method); - public override void ShuffleXor64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.ShuffleXor64Method); - public override void SubShuffleXor32(TILEmitter emitter) => emitter.EmitCall(ScalarOperations2.SubShuffleXor32Method); - public override void SubShuffleXor64(TILEmitter emitter) => - emitter.EmitCall(ScalarOperations2.SubShuffleXor64Method); - #endregion #region IO From c4e3946094d4373850029fb5b922371d66badccd Mon Sep 17 00:00:00 2001 From: Marcel Koester Date: Sun, 1 Oct 2023 20:23:25 +0200 Subject: [PATCH 6/6] Adapted generated Scalar operations. --- .../Velocity/Scalar/ScalarOperations.tt | 183 ------------------ 1 file changed, 183 deletions(-) diff --git a/Src/ILGPU/Backends/Velocity/Scalar/ScalarOperations.tt b/Src/ILGPU/Backends/Velocity/Scalar/ScalarOperations.tt index ab98fdd53..4c86663d7 100644 --- a/Src/ILGPU/Backends/Velocity/Scalar/ScalarOperations.tt +++ b/Src/ILGPU/Backends/Velocity/Scalar/ScalarOperations.tt @@ -993,18 +993,6 @@ namespace ILGPU.Backends.Velocity.Scalar return FromScalarI32(count); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> BarrierPopCount64( - <#= warpType32 #> mask, - <#= warpType64 #> warp) - { - int count = 0; -<# for (int i = 1; i <= warpSize; ++i) { #> - count += mask.Item<#= i #> != 0 ? (warp.Item<#= i #> != 0 ? 1 : 0) : 0; -<# } #> - return FromScalarI64((long)count); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static <#= warpType32 #> BarrierAnd32( <#= warpType32 #> mask, @@ -1017,18 +1005,6 @@ namespace ILGPU.Backends.Velocity.Scalar return FromScalarI32(andMask); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> BarrierAnd64( - <#= warpType32 #> mask, - <#= warpType64 #> warp) - { - long andMask = 1; -<# for (int i = 1; i <= warpSize; ++i) { #> - andMask &= mask.Item<#= i #> != 0 ? warp.Item<#= i #> : 0; -<# } #> - return FromScalarI64(andMask); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static <#= warpType32 #> BarrierOr32( <#= warpType32 #> mask, @@ -1041,18 +1017,6 @@ namespace ILGPU.Backends.Velocity.Scalar return FromScalarI32(orMask); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> BarrierOr64( - <#= warpType32 #> mask, - <#= warpType64 #> warp) - { - long orMask = 0; -<# for (int i = 1; i <= warpSize; ++i) { #> - orMask |= mask.Item<#= i #> != 0 ? warp.Item<#= i #> : 0; -<# } #> - return FromScalarI64(orMask); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int GetShuffledLane32( <#= warpType32 #> value, @@ -1069,46 +1033,6 @@ namespace ILGPU.Backends.Velocity.Scalar } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType32 #> Broadcast32( - <#= warpType32 #> mask, - <#= warpType32 #> value, - <#= warpType32 #> sourceLane) - { - // Mask is unused at the moment - int sourceLaneIdx = sourceLane.Item1; - int result = GetShuffledLane32(value, sourceLaneIdx); - return Select32(mask, value, FromScalarI32(result)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static long GetShuffledLane64( - <#= warpType64 #> value, - int sourceLane) - { - switch (sourceLane) - { -<# for (int i = 0; i < warpSize - 1; ++i) { #> - case <#= i #>: - return value.Item<#= i + 1 #>; -<# } #> - default: - return value.Item<#= warpSize #>; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> Broadcast64( - <#= warpType32 #> mask, - <#= warpType64 #> value, - <#= warpType64 #> sourceLane) - { - // Mask is unused at the moment - int sourceLaneIdx = (int)sourceLane.Item1; - long result = GetShuffledLane64(value, sourceLaneIdx); - return Select64(mask, value, FromScalarI64(result)); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static <#= warpType32 #> Shuffle32( <#= warpType32 #> mask, @@ -1125,22 +1049,6 @@ namespace ILGPU.Backends.Velocity.Scalar return result; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> Shuffle64( - <#= warpType32 #> mask, - <#= warpType64 #> value, - <#= warpType64 #> sourceLanes) - { - // Mask is unused at the moment - Unsafe.SkipInit(out <#= warpType64 #> result); -<# for (int i = 1; i <= warpSize; ++i) { #> - result.Item<#= i #> = mask.Item<#= i #> != 0 - ? GetShuffledLane64(value, (int)sourceLanes.Item<#= i #>) - : value.Item<#= i #>; -<# } #> - return result; - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static void ComputeShuffleConfig( <#= warpType32 #> width, @@ -1173,29 +1081,6 @@ namespace ILGPU.Backends.Velocity.Scalar return Shuffle32(mask, warp, AddI32(adjustedLane, offset)); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> ShuffleUp64( - <#= warpType32 #> mask, - <#= warpType64 #> warp, - <#= warpType32 #> delta, - <#= warpType32 #> width) - { - var lane = SubI32(LoadLaneIndexVector32(), delta); - return Shuffle64(mask, warp, lane); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> SubShuffleUp64( - <#= warpType32 #> mask, - <#= warpType64 #> warp, - <#= warpType32 #> delta, - <#= warpType32 #> width) - { - ComputeShuffleConfig(width, out var lane, out var offset); - var adjustedLane = SubI32(lane, delta); - return Shuffle64(mask, warp, AddI32(adjustedLane, offset)); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static <#= warpType32 #> ShuffleDown32( <#= warpType32 #> mask, @@ -1218,28 +1103,6 @@ namespace ILGPU.Backends.Velocity.Scalar return Shuffle32(mask, warp, AddI32(adjustedLane, offset)); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> ShuffleDown64( - <#= warpType32 #> mask, - <#= warpType64 #> warp, - <#= warpType32 #> delta) - { - var lane = AddI32(LoadLaneIndexVector32(), delta); - return Shuffle64(mask, warp, lane); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> SubShuffleDown64( - <#= warpType32 #> mask, - <#= warpType64 #> warp, - <#= warpType32 #> delta, - <#= warpType32 #> width) - { - ComputeShuffleConfig(width, out var lane, out var offset); - var adjustedLane = AddI32(lane, delta); - return Shuffle64(mask, warp, AddI32(adjustedLane, offset)); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static <#= warpType32 #> ShuffleXor32( <#= warpType32 #> mask, @@ -1262,72 +1125,26 @@ namespace ILGPU.Backends.Velocity.Scalar return Shuffle32(mask, warp, AddI32(adjustedLane, offset)); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> ShuffleXor64( - <#= warpType32 #> mask, - <#= warpType64 #> warp, - <#= warpType32 #> laneMask) - { - var lane = XorU32(LoadLaneIndexVector32(), laneMask); - return Shuffle64(mask, warp, lane); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static <#= warpType64 #> SubShuffleXor64( - <#= warpType32 #> mask, - <#= warpType64 #> warp, - <#= warpType32 #> laneMask, - <#= warpType32 #> width) - { - ComputeShuffleConfig(width, out var lane, out var offset); - var adjustedLane = XorU32(lane, laneMask); - return Shuffle64(mask, warp, AddI32(adjustedLane, offset)); - } - public static readonly MethodInfo BarrierPopCount32Method = GetMethod(nameof(BarrierPopCount32)); - public static readonly MethodInfo BarrierPopCount64Method = - GetMethod(nameof(BarrierPopCount64)); public static readonly MethodInfo BarrierAnd32Method = GetMethod(nameof(BarrierAnd32)); - public static readonly MethodInfo BarrierAnd64Method = - GetMethod(nameof(BarrierAnd64)); public static readonly MethodInfo BarrierOr32Method = GetMethod(nameof(BarrierOr32)); - public static readonly MethodInfo BarrierOr64Method = - GetMethod(nameof(BarrierOr64)); - public static readonly MethodInfo Broadcast32Method = - GetMethod(nameof(Broadcast32)); - public static readonly MethodInfo Broadcast64Method = - GetMethod(nameof(Broadcast64)); public static readonly MethodInfo Shuffle32Method = GetMethod(nameof(Shuffle32)); - public static readonly MethodInfo Shuffle64Method = - GetMethod(nameof(Shuffle64)); public static readonly MethodInfo ShuffleUp32Method = GetMethod(nameof(ShuffleUp32)); public static readonly MethodInfo SubShuffleUp32Method = GetMethod(nameof(SubShuffleUp32)); - public static readonly MethodInfo ShuffleUp64Method = - GetMethod(nameof(ShuffleUp64)); - public static readonly MethodInfo SubShuffleUp64Method = - GetMethod(nameof(SubShuffleUp64)); public static readonly MethodInfo ShuffleDown32Method = GetMethod(nameof(ShuffleDown32)); public static readonly MethodInfo SubShuffleDown32Method = GetMethod(nameof(SubShuffleDown32)); - public static readonly MethodInfo ShuffleDown64Method = - GetMethod(nameof(ShuffleDown64)); - public static readonly MethodInfo SubShuffleDown64Method = - GetMethod(nameof(SubShuffleDown64)); public static readonly MethodInfo ShuffleXor32Method = GetMethod(nameof(ShuffleXor32)); public static readonly MethodInfo SubShuffleXor32Method = GetMethod(nameof(SubShuffleXor32)); - public static readonly MethodInfo ShuffleXor64Method = - GetMethod(nameof(ShuffleXor64)); - public static readonly MethodInfo SubShuffleXor64Method = - GetMethod(nameof(SubShuffleXor64)); #endregion