Skip to content

Commit

Permalink
Optimized PTX IntrinsicMath implementation to use LibDevice. (m4rs-mt…
Browse files Browse the repository at this point in the history
  • Loading branch information
MoFtZ committed Apr 18, 2024
1 parent 57a2d6c commit 1064c93
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 18 deletions.
13 changes: 9 additions & 4 deletions Src/ILGPU.Algorithms/PTX/PTXContext.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU Algorithms
// Copyright (c) 2019-2023 ILGPU Project
// Copyright (c) 2019-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXContext.cs
Expand Down Expand Up @@ -46,7 +46,8 @@ static partial class PTXContext
private static readonly PTXIntrinsic MathCodeGeneratorIntrinsic =
new PTXIntrinsic(
MathCodeGenerator,
IntrinsicImplementationMode.GenerateCode)
IntrinsicImplementationMode.GenerateCode,
libDeviceRequired: false)
.ThrowIfNull();

/// <summary>
Expand All @@ -70,7 +71,8 @@ private static PTXIntrinsic GetMathCodeGeneratorIntrinsic(
PTXMathType,
nameof(PTXMath.GenerateMathIntrinsic),
IntrinsicImplementationMode.GenerateCode,
minArchitecture);
minArchitecture,
libDeviceRequired: false);

/// <summary>
/// Resolves a PTX intrinsic for the given math-function configuration.
Expand All @@ -87,7 +89,10 @@ private static PTXIntrinsic GetMathIntrinsic(string name, params Type[] types)
types,
null)
.ThrowIfNull();
return new PTXIntrinsic(targetMethod, IntrinsicImplementationMode.Redirect);
return new PTXIntrinsic(
targetMethod,
IntrinsicImplementationMode.Redirect,
libDeviceRequired: false);
}

/// <summary>
Expand Down
97 changes: 95 additions & 2 deletions Src/ILGPU/Backends/PTX/PTXIntrinsic.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2019-2021 ILGPU Project
// Copyright (c) 2019-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXIntrinsic.cs
Expand Down Expand Up @@ -52,6 +52,66 @@ public PTXIntrinsic(MethodInfo targetMethod, IntrinsicImplementationMode mode)
mode)
{ }

/// <summary>
/// Constructs a new PTX intrinsic that can handle all architectures
/// newer or equal to <paramref name="minArchitecture"/>.
/// </summary>
/// <param name="targetMethod">The associated target method.</param>
/// <param name="mode">The code-generation mode.</param>
/// <param name="minArchitecture">The target/minimum architecture.</param>
public PTXIntrinsic(
MethodInfo targetMethod,
IntrinsicImplementationMode mode,
CudaArchitecture minArchitecture)
: base(
BackendType.PTX,
targetMethod,
mode)
{
MinArchitecture = minArchitecture;
}

/// <summary>
/// Constructs a new PTX intrinsic.
/// </summary>
/// <param name="targetMethod">The associated target method.</param>
/// <param name="mode">The code-generation mode.</param>
/// <param name="minArchitecture">The target/minimum architecture.</param>
/// <param name="maxArchitecture">The max architecture (exclusive).</param>
public PTXIntrinsic(
MethodInfo targetMethod,
IntrinsicImplementationMode mode,
CudaArchitecture? minArchitecture,
CudaArchitecture? maxArchitecture)
: base(
BackendType.PTX,
targetMethod,
mode)
{
MinArchitecture = minArchitecture;
MaxArchitecture = maxArchitecture;
}

/// <summary>
/// Constructs a new PTX intrinsic.
/// </summary>
/// <param name="targetMethod">The associated target method.</param>
/// <param name="mode">The code-generator mode.</param>
/// <param name="libDeviceRequired">
/// Indicates whether LibDevice is required.
/// </param>
public PTXIntrinsic(
MethodInfo targetMethod,
IntrinsicImplementationMode mode,
bool libDeviceRequired)
: base(
BackendType.PTX,
targetMethod,
mode)
{
LibDeviceRequired = libDeviceRequired;
}

/// <summary>
/// Constructs a new PTX intrinsic that can handle all architectures.
/// </summary>
Expand Down Expand Up @@ -143,6 +203,32 @@ public PTXIntrinsic(
MaxArchitecture = maxArchitecture;
}

/// <summary>
/// Constructs a new PTX intrinsic.
/// </summary>
/// <param name="handlerType">The associated target handler type.</param>
/// <param name="methodName">The target method name (or null).</param>
/// <param name="mode">The code-generator mode.</param>
/// <param name="minArchitecture">The target/minimum architecture.</param>
/// <param name="libDeviceRequired">
/// Indicates whether LibDevice is required.
/// </param>
public PTXIntrinsic(
Type handlerType,
string methodName,
IntrinsicImplementationMode mode,
CudaArchitecture minArchitecture,
bool libDeviceRequired)
: base(
BackendType.PTX,
handlerType,
methodName,
mode)
{
MinArchitecture = minArchitecture;
LibDeviceRequired = libDeviceRequired;
}

#endregion

#region Properties
Expand All @@ -164,6 +250,11 @@ public PTXIntrinsic(
/// </remarks>
public CudaArchitecture? MaxArchitecture { get; }

/// <summary>
/// Returns whether LibDevice is required to use this instrinsic.
/// </summary>
public bool? LibDeviceRequired { get; }

#endregion

#region Methods
Expand All @@ -174,7 +265,9 @@ backend is PTXBackend ptxBackend
&& (!MinArchitecture.HasValue ||
ptxBackend.Architecture >= MinArchitecture.Value)
&& (!MaxArchitecture.HasValue ||
ptxBackend.Architecture < MaxArchitecture.Value);
ptxBackend.Architecture < MaxArchitecture.Value)
&& (!LibDeviceRequired.HasValue ||
ptxBackend.NvvmAPI != null == LibDeviceRequired.Value);

#endregion
}
Expand Down
95 changes: 94 additions & 1 deletion Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2016-2021 ILGPU Project
// Copyright (c) 2016-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXIntrinsics.Generated.tt/PTXIntrinsics.Generated.cs
Expand Down Expand Up @@ -35,6 +35,61 @@ var fp16Ops = new (string, string, string, string)[]

("Ternary", "MultiplyAdd", "FmaFP32", "SM_53"),
};

var unaryMathFunctions = new (string, string, TypeInformation)[]
{
("AcosF", "Acos", FloatTypes[2]),
("AsinF", "Asin", FloatTypes[2]),
("AtanF", "Atan", FloatTypes[2]),
("CeilingF", "Ceil", FloatTypes[2]),
("CosF", "Cos", FloatTypes[2]),
("CoshF", "Cosh", FloatTypes[2]),
("ExpF", "Exp", FloatTypes[2]),
("Exp2F", "Exp2", FloatTypes[2]),
("FloorF", "Floor", FloatTypes[2]),
("LogF", "Log", FloatTypes[2]),
("Log2F", "Log2", FloatTypes[2]),
("Log10F", "Log10", FloatTypes[2]),
("RsqrtF", "Rsqrt", FloatTypes[2]),
("SinF", "Sin", FloatTypes[2]),
("SinhF", "Sinh", FloatTypes[2]),
("SqrtF", "Sqrt", FloatTypes[2]),
("TanF", "Tan", FloatTypes[2]),
("TanhF", "Tanh", FloatTypes[2]),

("AcosF", "Acos", FloatTypes[1]),
("AsinF", "Asin", FloatTypes[1]),
("AtanF", "Atan", FloatTypes[1]),
("CeilingF", "Ceil", FloatTypes[1]),
("CosF", "Cos", FloatTypes[1]),
("CoshF", "Cosh", FloatTypes[1]),
("ExpF", "Exp", FloatTypes[1]),
("Exp2F", "Exp2", FloatTypes[1]),
("FloorF", "Floor", FloatTypes[1]),
("LogF", "Log", FloatTypes[1]),
("Log2F", "Log2", FloatTypes[1]),
("Log10F", "Log10", FloatTypes[1]),
("RsqrtF", "Rsqrt", FloatTypes[1]),
("SinF", "Sin", FloatTypes[1]),
("SinhF", "Sinh", FloatTypes[1]),
("SqrtF", "Sqrt", FloatTypes[1]),
("TanF", "Tan", FloatTypes[1]),
("TanhF", "Tanh", FloatTypes[1]),
};

var binaryMathFunctions = new (string, string, string, TypeInformation)[]
{
("Atan2F", "Atan", null, FloatTypes[2]),
("BinaryLogF", "Log", "IntrinsicMath.BinaryLog", FloatTypes[2]),
("PowF", "Pow", null, FloatTypes[2]),
("Rem", "Fmod", null, FloatTypes[2]),

("Atan2F", "Atan", null, FloatTypes[1]),
("BinaryLogF", "Log", "IntrinsicMath.BinaryLog", FloatTypes[1]),
("PowF", "Pow", null, FloatTypes[1]),
("Rem", "Fmod", null, FloatTypes[1]),
};

#>
using ILGPU.IR.Intrinsics;
using ILGPU.IR.Values;
Expand Down Expand Up @@ -157,5 +212,43 @@ namespace ILGPU.Backends.PTX
}

#endregion

#region Math

/// <summary>
/// Registers all Math intrinsics with the given manager.
/// </summary>
/// <param name="manager">The target implementation manager.</param>
private static void RegisterMathFunctions(IntrinsicImplementationManager manager)
{
<# foreach (var (kind, methodName, type) in unaryMathFunctions) { #>
manager.RegisterUnaryArithmetic(
UnaryArithmeticKind.<#= kind #>,
BasicValueType.<#= type.GetBasicValueType() #>,
CreateLibDeviceMathIntrinsic(
nameof(LibDevice.<#= methodName #>),
typeof(<#= type.Type #>)));
<# } #>

<# foreach (var (kind, methodName, baseClass, type) in binaryMathFunctions) { #>
manager.RegisterBinaryArithmetic(
BinaryArithmeticKind.<#= kind #>,
BasicValueType.<#= type.GetBasicValueType() #>,
<# if (baseClass == null) { #>
CreateLibDeviceMathIntrinsic(
nameof(LibDevice.<#= methodName #>),
typeof(<#= type.Type #>),
typeof(<#= type.Type #>)));
<# } else { #>
CreateMathIntrinsic(
typeof(<#= baseClass #>),
nameof(<#= baseClass #>.<#= methodName #>),
typeof(<#= type.Type #>),
typeof(<#= type.Type #>)));
<# } #>
<# } #>
}

#endregion
}
}
50 changes: 49 additions & 1 deletion Src/ILGPU/Backends/PTX/PTXIntrinsics.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2019-2021 ILGPU Project
// Copyright (c) 2019-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXIntrinsics.cs
Expand All @@ -13,7 +13,9 @@
using ILGPU.IR.Intrinsics;
using ILGPU.IR.Values;
using ILGPU.Runtime.Cuda;
using ILGPU.Util;
using System;
using System.Reflection;
using System.Runtime.CompilerServices;

namespace ILGPU.Backends.PTX
Expand Down Expand Up @@ -84,6 +86,51 @@ private static PTXIntrinsic CreateFP16Intrinsic(
maxArchitecture.Value)
: new PTXIntrinsic(HalfType, name, IntrinsicImplementationMode.Redirect);

/// <summary>
/// Creates a PTX intrinsic for the given math function.
/// </summary>
/// <param name="name">The intrinsic name.</param>
/// <param name="types">The parameter types.</param>
/// <returns>The resolved intrinsic representation.</returns>
private static PTXIntrinsic CreateLibDeviceMathIntrinsic(
string name,
params Type[] types)
{
var targetMethod = typeof(LibDevice).GetMethod(
name,
BindingFlags.Public | BindingFlags.Static,
null,
types,
null)
.ThrowIfNull();
return new PTXIntrinsic(
targetMethod,
IntrinsicImplementationMode.Redirect,
libDeviceRequired: true);
}

/// <summary>
/// Creates a PTX intrinsic for the given math function.
/// </summary>
/// <param name="baseType">The source type containing the intrinsic.</param>
/// <param name="name">The intrinsic name.</param>
/// <param name="types">The parameter types.</param>
/// <returns>The resolved intrinsic representation.</returns>
private static PTXIntrinsic CreateMathIntrinsic(
Type baseType,
string name,
params Type[] types)
{
var targetMethod = baseType.GetMethod(
name,
BindingFlags.Public | BindingFlags.Static,
null,
types,
null)
.ThrowIfNull();
return new PTXIntrinsic(targetMethod, IntrinsicImplementationMode.Redirect);
}

/// <summary>
/// Registers all PTX intrinsics with the given manager.
/// </summary>
Expand All @@ -95,6 +142,7 @@ public static void Register(IntrinsicImplementationManager manager)
RegisterWarpShuffles(manager);
RegisterFP16(manager);
RegisterBitFunctions(manager);
RegisterMathFunctions(manager);
}

#endregion
Expand Down
Loading

0 comments on commit 1064c93

Please sign in to comment.