Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for Velocity intrinsics and simplified code generator. #1102

Merged
merged 6 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.cs
Src/ILGPU/Backends/PTX/PTXLibDeviceMethods.cs
Src/ILGPU/Backends/PTX/PTXLibDeviceNvvm.cs
Src/ILGPU/Backends/Velocity/Scalar/ScalarOperations.cs
Src/ILGPU/Backends/Velocity/VelocityIntrinsics.Generated.cs
Src/ILGPU/Frontend/Intrinsic/RemappedIntrinsics.Generated.cs
Src/ILGPU/HalfConversion.cs
Src/ILGPU/IR/Construction/ArithmeticOperations.cs
Expand Down
36 changes: 0 additions & 36 deletions Src/ILGPU/Backends/Velocity/Scalar/Scalar.cs
Original file line number Diff line number Diff line change
Expand Up @@ -317,69 +317,33 @@ public override void Atomic64<TILEmitter>(
public override void BarrierPopCount32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.BarrierPopCount32Method);

public override void BarrierPopCount64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.BarrierPopCount64Method);

public override void BarrierAnd32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.BarrierAnd32Method);

public override void BarrierAnd64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.BarrierAnd64Method);

public override void BarrierOr32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.BarrierOr32Method);

public override void BarrierOr64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.BarrierOr64Method);

public override void Broadcast32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.Broadcast32Method);

public override void Broadcast64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.Broadcast64Method);

public override void Shuffle32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.Shuffle32Method);

public override void Shuffle64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.Shuffle64Method);

public override void ShuffleUp32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.ShuffleUp32Method);

public override void ShuffleUp64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.ShuffleUp64Method);

public override void SubShuffleUp32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.SubShuffleUp32Method);

public override void SubShuffleUp64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.SubShuffleUp64Method);

public override void ShuffleDown32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.ShuffleDown32Method);

public override void ShuffleDown64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.ShuffleDown64Method);

public override void SubShuffleDown32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.SubShuffleDown32Method);

public override void SubShuffleDown64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.SubShuffleDown64Method);

public override void ShuffleXor32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.ShuffleXor32Method);

public override void ShuffleXor64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.ShuffleXor64Method);

public override void SubShuffleXor32<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.SubShuffleXor32Method);

public override void SubShuffleXor64<TILEmitter>(TILEmitter emitter) =>
emitter.EmitCall(ScalarOperations2.SubShuffleXor64Method);

#endregion

#region IO
Expand Down
183 changes: 0 additions & 183 deletions Src/ILGPU/Backends/Velocity/Scalar/ScalarOperations.tt
Original file line number Diff line number Diff line change
Expand Up @@ -993,18 +993,6 @@ namespace ILGPU.Backends.Velocity.Scalar
return FromScalarI32(count);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> BarrierPopCount64(
<#= warpType32 #> mask,
<#= warpType64 #> warp)
{
int count = 0;
<# for (int i = 1; i <= warpSize; ++i) { #>
count += mask.Item<#= i #> != 0 ? (warp.Item<#= i #> != 0 ? 1 : 0) : 0;
<# } #>
return FromScalarI64((long)count);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType32 #> BarrierAnd32(
<#= warpType32 #> mask,
Expand All @@ -1017,18 +1005,6 @@ namespace ILGPU.Backends.Velocity.Scalar
return FromScalarI32(andMask);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> BarrierAnd64(
<#= warpType32 #> mask,
<#= warpType64 #> warp)
{
long andMask = 1;
<# for (int i = 1; i <= warpSize; ++i) { #>
andMask &= mask.Item<#= i #> != 0 ? warp.Item<#= i #> : 0;
<# } #>
return FromScalarI64(andMask);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType32 #> BarrierOr32(
<#= warpType32 #> mask,
Expand All @@ -1041,18 +1017,6 @@ namespace ILGPU.Backends.Velocity.Scalar
return FromScalarI32(orMask);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> BarrierOr64(
<#= warpType32 #> mask,
<#= warpType64 #> warp)
{
long orMask = 0;
<# for (int i = 1; i <= warpSize; ++i) { #>
orMask |= mask.Item<#= i #> != 0 ? warp.Item<#= i #> : 0;
<# } #>
return FromScalarI64(orMask);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int GetShuffledLane32(
<#= warpType32 #> value,
Expand All @@ -1069,46 +1033,6 @@ namespace ILGPU.Backends.Velocity.Scalar
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType32 #> Broadcast32(
<#= warpType32 #> mask,
<#= warpType32 #> value,
<#= warpType32 #> sourceLane)
{
// Mask is unused at the moment
int sourceLaneIdx = sourceLane.Item1;
int result = GetShuffledLane32(value, sourceLaneIdx);
return Select32(mask, value, FromScalarI32(result));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static long GetShuffledLane64(
<#= warpType64 #> value,
int sourceLane)
{
switch (sourceLane)
{
<# for (int i = 0; i < warpSize - 1; ++i) { #>
case <#= i #>:
return value.Item<#= i + 1 #>;
<# } #>
default:
return value.Item<#= warpSize #>;
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> Broadcast64(
<#= warpType32 #> mask,
<#= warpType64 #> value,
<#= warpType64 #> sourceLane)
{
// Mask is unused at the moment
int sourceLaneIdx = (int)sourceLane.Item1;
long result = GetShuffledLane64(value, sourceLaneIdx);
return Select64(mask, value, FromScalarI64(result));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType32 #> Shuffle32(
<#= warpType32 #> mask,
Expand All @@ -1125,22 +1049,6 @@ namespace ILGPU.Backends.Velocity.Scalar
return result;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> Shuffle64(
<#= warpType32 #> mask,
<#= warpType64 #> value,
<#= warpType64 #> sourceLanes)
{
// Mask is unused at the moment
Unsafe.SkipInit(out <#= warpType64 #> result);
<# for (int i = 1; i <= warpSize; ++i) { #>
result.Item<#= i #> = mask.Item<#= i #> != 0
? GetShuffledLane64(value, (int)sourceLanes.Item<#= i #>)
: value.Item<#= i #>;
<# } #>
return result;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void ComputeShuffleConfig(
<#= warpType32 #> width,
Expand Down Expand Up @@ -1173,29 +1081,6 @@ namespace ILGPU.Backends.Velocity.Scalar
return Shuffle32(mask, warp, AddI32(adjustedLane, offset));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> ShuffleUp64(
<#= warpType32 #> mask,
<#= warpType64 #> warp,
<#= warpType32 #> delta,
<#= warpType32 #> width)
{
var lane = SubI32(LoadLaneIndexVector32(), delta);
return Shuffle64(mask, warp, lane);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> SubShuffleUp64(
<#= warpType32 #> mask,
<#= warpType64 #> warp,
<#= warpType32 #> delta,
<#= warpType32 #> width)
{
ComputeShuffleConfig(width, out var lane, out var offset);
var adjustedLane = SubI32(lane, delta);
return Shuffle64(mask, warp, AddI32(adjustedLane, offset));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType32 #> ShuffleDown32(
<#= warpType32 #> mask,
Expand All @@ -1218,28 +1103,6 @@ namespace ILGPU.Backends.Velocity.Scalar
return Shuffle32(mask, warp, AddI32(adjustedLane, offset));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> ShuffleDown64(
<#= warpType32 #> mask,
<#= warpType64 #> warp,
<#= warpType32 #> delta)
{
var lane = AddI32(LoadLaneIndexVector32(), delta);
return Shuffle64(mask, warp, lane);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> SubShuffleDown64(
<#= warpType32 #> mask,
<#= warpType64 #> warp,
<#= warpType32 #> delta,
<#= warpType32 #> width)
{
ComputeShuffleConfig(width, out var lane, out var offset);
var adjustedLane = AddI32(lane, delta);
return Shuffle64(mask, warp, AddI32(adjustedLane, offset));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType32 #> ShuffleXor32(
<#= warpType32 #> mask,
Expand All @@ -1262,72 +1125,26 @@ namespace ILGPU.Backends.Velocity.Scalar
return Shuffle32(mask, warp, AddI32(adjustedLane, offset));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> ShuffleXor64(
<#= warpType32 #> mask,
<#= warpType64 #> warp,
<#= warpType32 #> laneMask)
{
var lane = XorU32(LoadLaneIndexVector32(), laneMask);
return Shuffle64(mask, warp, lane);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static <#= warpType64 #> SubShuffleXor64(
<#= warpType32 #> mask,
<#= warpType64 #> warp,
<#= warpType32 #> laneMask,
<#= warpType32 #> width)
{
ComputeShuffleConfig(width, out var lane, out var offset);
var adjustedLane = XorU32(lane, laneMask);
return Shuffle64(mask, warp, AddI32(adjustedLane, offset));
}

public static readonly MethodInfo BarrierPopCount32Method =
GetMethod(nameof(BarrierPopCount32));
public static readonly MethodInfo BarrierPopCount64Method =
GetMethod(nameof(BarrierPopCount64));
public static readonly MethodInfo BarrierAnd32Method =
GetMethod(nameof(BarrierAnd32));
public static readonly MethodInfo BarrierAnd64Method =
GetMethod(nameof(BarrierAnd64));
public static readonly MethodInfo BarrierOr32Method =
GetMethod(nameof(BarrierOr32));
public static readonly MethodInfo BarrierOr64Method =
GetMethod(nameof(BarrierOr64));
public static readonly MethodInfo Broadcast32Method =
GetMethod(nameof(Broadcast32));
public static readonly MethodInfo Broadcast64Method =
GetMethod(nameof(Broadcast64));
public static readonly MethodInfo Shuffle32Method =
GetMethod(nameof(Shuffle32));
public static readonly MethodInfo Shuffle64Method =
GetMethod(nameof(Shuffle64));
public static readonly MethodInfo ShuffleUp32Method =
GetMethod(nameof(ShuffleUp32));
public static readonly MethodInfo SubShuffleUp32Method =
GetMethod(nameof(SubShuffleUp32));
public static readonly MethodInfo ShuffleUp64Method =
GetMethod(nameof(ShuffleUp64));
public static readonly MethodInfo SubShuffleUp64Method =
GetMethod(nameof(SubShuffleUp64));
public static readonly MethodInfo ShuffleDown32Method =
GetMethod(nameof(ShuffleDown32));
public static readonly MethodInfo SubShuffleDown32Method =
GetMethod(nameof(SubShuffleDown32));
public static readonly MethodInfo ShuffleDown64Method =
GetMethod(nameof(ShuffleDown64));
public static readonly MethodInfo SubShuffleDown64Method =
GetMethod(nameof(SubShuffleDown64));
public static readonly MethodInfo ShuffleXor32Method =
GetMethod(nameof(ShuffleXor32));
public static readonly MethodInfo SubShuffleXor32Method =
GetMethod(nameof(SubShuffleXor32));
public static readonly MethodInfo ShuffleXor64Method =
GetMethod(nameof(ShuffleXor64));
public static readonly MethodInfo SubShuffleXor64Method =
GetMethod(nameof(SubShuffleXor64));

#endregion

Expand Down
Loading
Loading