diff --git a/Src/ILGPU.Algorithms/PTX/PTXContext.cs b/Src/ILGPU.Algorithms/PTX/PTXContext.cs index 61a268508..dfab0f341 100644 --- a/Src/ILGPU.Algorithms/PTX/PTXContext.cs +++ b/Src/ILGPU.Algorithms/PTX/PTXContext.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU Algorithms -// Copyright (c) 2019-2023 ILGPU Project +// Copyright (c) 2019-2024 ILGPU Project // www.ilgpu.net // // File: PTXContext.cs @@ -46,7 +46,8 @@ static partial class PTXContext private static readonly PTXIntrinsic MathCodeGeneratorIntrinsic = new PTXIntrinsic( MathCodeGenerator, - IntrinsicImplementationMode.GenerateCode) + IntrinsicImplementationMode.GenerateCode, + libDeviceRequired: false) .ThrowIfNull(); /// @@ -70,7 +71,8 @@ private static PTXIntrinsic GetMathCodeGeneratorIntrinsic( PTXMathType, nameof(PTXMath.GenerateMathIntrinsic), IntrinsicImplementationMode.GenerateCode, - minArchitecture); + minArchitecture, + libDeviceRequired: false); /// /// Resolves a PTX intrinsic for the given math-function configuration. @@ -87,7 +89,10 @@ private static PTXIntrinsic GetMathIntrinsic(string name, params Type[] types) types, null) .ThrowIfNull(); - return new PTXIntrinsic(targetMethod, IntrinsicImplementationMode.Redirect); + return new PTXIntrinsic( + targetMethod, + IntrinsicImplementationMode.Redirect, + libDeviceRequired: false); } /// diff --git a/Src/ILGPU/Backends/PTX/PTXIntrinsic.cs b/Src/ILGPU/Backends/PTX/PTXIntrinsic.cs index 6c6f32241..b1c03f2d0 100644 --- a/Src/ILGPU/Backends/PTX/PTXIntrinsic.cs +++ b/Src/ILGPU/Backends/PTX/PTXIntrinsic.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2019-2021 ILGPU Project +// Copyright (c) 2019-2024 ILGPU Project // www.ilgpu.net // // File: PTXIntrinsic.cs @@ -52,6 +52,66 @@ public PTXIntrinsic(MethodInfo targetMethod, IntrinsicImplementationMode mode) mode) { } + /// + /// Constructs a new PTX intrinsic that can handle all architectures + /// newer or equal to . + /// + /// The associated target method. + /// The code-generation mode. + /// The target/minimum architecture. + public PTXIntrinsic( + MethodInfo targetMethod, + IntrinsicImplementationMode mode, + CudaArchitecture minArchitecture) + : base( + BackendType.PTX, + targetMethod, + mode) + { + MinArchitecture = minArchitecture; + } + + /// + /// Constructs a new PTX intrinsic. + /// + /// The associated target method. + /// The code-generation mode. + /// The target/minimum architecture. + /// The max architecture (exclusive). + public PTXIntrinsic( + MethodInfo targetMethod, + IntrinsicImplementationMode mode, + CudaArchitecture? minArchitecture, + CudaArchitecture? maxArchitecture) + : base( + BackendType.PTX, + targetMethod, + mode) + { + MinArchitecture = minArchitecture; + MaxArchitecture = maxArchitecture; + } + + /// + /// Constructs a new PTX intrinsic. + /// + /// The associated target method. + /// The code-generator mode. + /// + /// Indicates whether LibDevice is required. + /// + public PTXIntrinsic( + MethodInfo targetMethod, + IntrinsicImplementationMode mode, + bool libDeviceRequired) + : base( + BackendType.PTX, + targetMethod, + mode) + { + LibDeviceRequired = libDeviceRequired; + } + /// /// Constructs a new PTX intrinsic that can handle all architectures. /// @@ -143,6 +203,32 @@ public PTXIntrinsic( MaxArchitecture = maxArchitecture; } + /// + /// Constructs a new PTX intrinsic. + /// + /// The associated target handler type. + /// The target method name (or null). + /// The code-generator mode. + /// The target/minimum architecture. + /// + /// Indicates whether LibDevice is required. + /// + public PTXIntrinsic( + Type handlerType, + string methodName, + IntrinsicImplementationMode mode, + CudaArchitecture minArchitecture, + bool libDeviceRequired) + : base( + BackendType.PTX, + handlerType, + methodName, + mode) + { + MinArchitecture = minArchitecture; + LibDeviceRequired = libDeviceRequired; + } + #endregion #region Properties @@ -164,6 +250,11 @@ public PTXIntrinsic( /// public CudaArchitecture? MaxArchitecture { get; } + /// + /// Returns whether LibDevice is required to use this instrinsic. + /// + public bool? LibDeviceRequired { get; } + #endregion #region Methods @@ -174,7 +265,9 @@ backend is PTXBackend ptxBackend && (!MinArchitecture.HasValue || ptxBackend.Architecture >= MinArchitecture.Value) && (!MaxArchitecture.HasValue || - ptxBackend.Architecture < MaxArchitecture.Value); + ptxBackend.Architecture < MaxArchitecture.Value) + && (!LibDeviceRequired.HasValue || + ptxBackend.NvvmAPI != null == LibDeviceRequired.Value); #endregion } diff --git a/Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt b/Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt index 800dacce2..8e0475641 100644 --- a/Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt +++ b/Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2016-2021 ILGPU Project +// Copyright (c) 2016-2024 ILGPU Project // www.ilgpu.net // // File: PTXIntrinsics.Generated.tt/PTXIntrinsics.Generated.cs @@ -35,6 +35,61 @@ var fp16Ops = new (string, string, string, string)[] ("Ternary", "MultiplyAdd", "FmaFP32", "SM_53"), }; + +var unaryMathFunctions = new (string, string, TypeInformation)[] +{ + ("AcosF", "Acos", FloatTypes[2]), + ("AsinF", "Asin", FloatTypes[2]), + ("AtanF", "Atan", FloatTypes[2]), + ("CeilingF", "Ceil", FloatTypes[2]), + ("CosF", "Cos", FloatTypes[2]), + ("CoshF", "Cosh", FloatTypes[2]), + ("ExpF", "Exp", FloatTypes[2]), + ("Exp2F", "Exp2", FloatTypes[2]), + ("FloorF", "Floor", FloatTypes[2]), + ("LogF", "Log", FloatTypes[2]), + ("Log2F", "Log2", FloatTypes[2]), + ("Log10F", "Log10", FloatTypes[2]), + ("RsqrtF", "Rsqrt", FloatTypes[2]), + ("SinF", "Sin", FloatTypes[2]), + ("SinhF", "Sinh", FloatTypes[2]), + ("SqrtF", "Sqrt", FloatTypes[2]), + ("TanF", "Tan", FloatTypes[2]), + ("TanhF", "Tanh", FloatTypes[2]), + + ("AcosF", "Acos", FloatTypes[1]), + ("AsinF", "Asin", FloatTypes[1]), + ("AtanF", "Atan", FloatTypes[1]), + ("CeilingF", "Ceil", FloatTypes[1]), + ("CosF", "Cos", FloatTypes[1]), + ("CoshF", "Cosh", FloatTypes[1]), + ("ExpF", "Exp", FloatTypes[1]), + ("Exp2F", "Exp2", FloatTypes[1]), + ("FloorF", "Floor", FloatTypes[1]), + ("LogF", "Log", FloatTypes[1]), + ("Log2F", "Log2", FloatTypes[1]), + ("Log10F", "Log10", FloatTypes[1]), + ("RsqrtF", "Rsqrt", FloatTypes[1]), + ("SinF", "Sin", FloatTypes[1]), + ("SinhF", "Sinh", FloatTypes[1]), + ("SqrtF", "Sqrt", FloatTypes[1]), + ("TanF", "Tan", FloatTypes[1]), + ("TanhF", "Tanh", FloatTypes[1]), +}; + +var binaryMathFunctions = new (string, string, string, TypeInformation)[] +{ + ("Atan2F", "Atan", null, FloatTypes[2]), + ("BinaryLogF", "Log", "IntrinsicMath.BinaryLog", FloatTypes[2]), + ("PowF", "Pow", null, FloatTypes[2]), + ("Rem", "Fmod", null, FloatTypes[2]), + + ("Atan2F", "Atan", null, FloatTypes[1]), + ("BinaryLogF", "Log", "IntrinsicMath.BinaryLog", FloatTypes[1]), + ("PowF", "Pow", null, FloatTypes[1]), + ("Rem", "Fmod", null, FloatTypes[1]), +}; + #> using ILGPU.IR.Intrinsics; using ILGPU.IR.Values; @@ -157,5 +212,43 @@ namespace ILGPU.Backends.PTX } #endregion + + #region Math + + /// + /// Registers all Math intrinsics with the given manager. + /// + /// The target implementation manager. + private static void RegisterMathFunctions(IntrinsicImplementationManager manager) + { +<# foreach (var (kind, methodName, type) in unaryMathFunctions) { #> + manager.RegisterUnaryArithmetic( + UnaryArithmeticKind.<#= kind #>, + BasicValueType.<#= type.GetBasicValueType() #>, + CreateLibDeviceMathIntrinsic( + nameof(LibDevice.<#= methodName #>), + typeof(<#= type.Type #>))); +<# } #> + +<# foreach (var (kind, methodName, baseClass, type) in binaryMathFunctions) { #> + manager.RegisterBinaryArithmetic( + BinaryArithmeticKind.<#= kind #>, + BasicValueType.<#= type.GetBasicValueType() #>, + <# if (baseClass == null) { #> + CreateLibDeviceMathIntrinsic( + nameof(LibDevice.<#= methodName #>), + typeof(<#= type.Type #>), + typeof(<#= type.Type #>))); + <# } else { #> + CreateMathIntrinsic( + typeof(<#= baseClass #>), + nameof(<#= baseClass #>.<#= methodName #>), + typeof(<#= type.Type #>), + typeof(<#= type.Type #>))); + <# } #> +<# } #> + } + + #endregion } } \ No newline at end of file diff --git a/Src/ILGPU/Backends/PTX/PTXIntrinsics.cs b/Src/ILGPU/Backends/PTX/PTXIntrinsics.cs index 564d6e16a..e9f14ee37 100644 --- a/Src/ILGPU/Backends/PTX/PTXIntrinsics.cs +++ b/Src/ILGPU/Backends/PTX/PTXIntrinsics.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2019-2023 ILGPU Project +// Copyright (c) 2019-2024 ILGPU Project // www.ilgpu.net // // File: PTXIntrinsics.cs @@ -13,7 +13,9 @@ using ILGPU.IR.Intrinsics; using ILGPU.IR.Values; using ILGPU.Runtime.Cuda; +using ILGPU.Util; using System; +using System.Reflection; using System.Runtime.CompilerServices; namespace ILGPU.Backends.PTX @@ -84,6 +86,51 @@ private static PTXIntrinsic CreateFP16Intrinsic( maxArchitecture.Value) : new PTXIntrinsic(HalfType, name, IntrinsicImplementationMode.Redirect); + /// + /// Creates a PTX intrinsic for the given math function. + /// + /// The intrinsic name. + /// The parameter types. + /// The resolved intrinsic representation. + private static PTXIntrinsic CreateLibDeviceMathIntrinsic( + string name, + params Type[] types) + { + var targetMethod = typeof(LibDevice).GetMethod( + name, + BindingFlags.Public | BindingFlags.Static, + null, + types, + null) + .ThrowIfNull(); + return new PTXIntrinsic( + targetMethod, + IntrinsicImplementationMode.Redirect, + libDeviceRequired: true); + } + + /// + /// Creates a PTX intrinsic for the given math function. + /// + /// The source type containing the intrinsic. + /// The intrinsic name. + /// The parameter types. + /// The resolved intrinsic representation. + private static PTXIntrinsic CreateMathIntrinsic( + Type baseType, + string name, + params Type[] types) + { + var targetMethod = baseType.GetMethod( + name, + BindingFlags.Public | BindingFlags.Static, + null, + types, + null) + .ThrowIfNull(); + return new PTXIntrinsic(targetMethod, IntrinsicImplementationMode.Redirect); + } + /// /// Registers all PTX intrinsics with the given manager. /// @@ -95,6 +142,7 @@ public static void Register(IntrinsicImplementationManager manager) RegisterWarpShuffles(manager); RegisterFP16(manager); RegisterBitFunctions(manager); + RegisterMathFunctions(manager); } #endregion diff --git a/Src/ILGPU/Context.Builder.cs b/Src/ILGPU/Context.Builder.cs index 036a0b056..ed42c6bd9 100644 --- a/Src/ILGPU/Context.Builder.cs +++ b/Src/ILGPU/Context.Builder.cs @@ -323,7 +323,16 @@ public Builder Profiling() /// Automatically detects the CUDA SDK location. /// /// The current builder instance. - public Builder LibDevice() + public Builder LibDevice() => + LibDevice(throwIfNotFound: true); + + /// + /// Turns on LibDevice support. + /// Automatically detects the CUDA SDK location. + /// + /// Determines error handling. + /// The current builder instance. + internal Builder LibDevice(bool throwIfNotFound) { // Find the CUDA installation path. var cudaEnvName = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) @@ -332,9 +341,11 @@ public Builder LibDevice() var cudaPath = Environment.GetEnvironmentVariable(cudaEnvName); if (string.IsNullOrEmpty(cudaPath)) { - throw new NotSupportedException(string.Format( + return throwIfNotFound + ? throw new NotSupportedException(string.Format( RuntimeErrorMessages.NotSupportedLibDeviceEnvironmentVariable, - cudaEnvName)); + cudaEnvName)) + : this; } var nvvmRoot = Path.Combine(cudaPath, "nvvm"); @@ -348,21 +359,33 @@ public Builder LibDevice() ? "nvvm64*.dll" : "libnvvm*.so"; var nvvmFiles = Directory.EnumerateFiles(nvvmBinDir, nvvmSearchPattern); - LibNvvmPath = nvvmFiles.FirstOrDefault() - ?? throw new NotSupportedException(string.Format( + var libNvvmPath = nvvmFiles.FirstOrDefault(); + if (libNvvmPath == null) + { + return throwIfNotFound + ? throw new NotSupportedException(string.Format( RuntimeErrorMessages.NotSupportedLibDeviceNotFoundNvvmDll, - nvvmBinDir)); + nvvmBinDir)) + : this; + } // Find the LibDevice Bitcode. var libDeviceDir = Path.Combine(nvvmRoot, "libdevice"); var libDeviceFiles = Directory.EnumerateFiles( libDeviceDir, "libdevice.*.bc"); - LibDevicePath = libDeviceFiles.FirstOrDefault() - ?? throw new NotSupportedException(string.Format( + var libDevicePath = libDeviceFiles.FirstOrDefault(); + if (libDevicePath == null) + { + return throwIfNotFound + ? throw new NotSupportedException(string.Format( RuntimeErrorMessages.NotSupportedLibDeviceNotFoundBitCode, - libDeviceDir)); + libDeviceDir)) + : this; + } + LibNvvmPath = libNvvmPath; + LibDevicePath = libDevicePath; return this; } diff --git a/Src/ILGPU/Runtime/Cuda/CudaContextExtensions.cs b/Src/ILGPU/Runtime/Cuda/CudaContextExtensions.cs index 78043b6ff..e028e14a8 100644 --- a/Src/ILGPU/Runtime/Cuda/CudaContextExtensions.cs +++ b/Src/ILGPU/Runtime/Cuda/CudaContextExtensions.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2021-2023 ILGPU Project +// Copyright (c) 2021-2024 ILGPU Project // www.ilgpu.net // // File: CudaContextExtensions.cs @@ -86,6 +86,10 @@ public static Context.Builder CudaInternal( Backend.RuntimePlatform)); } + // Silently enable automatic LibDevice detection, if not already configured. + if (builder.LibDevicePath == null && builder.LibNvvmPath == null) + builder.LibDevice(throwIfNotFound: false); + CudaDevice.GetDevices( configure, predicate,