From 88e8099a1290628ca601e996f50acca72ac0880d Mon Sep 17 00:00:00 2001 From: Marcel Koester Date: Sun, 30 Jul 2023 20:36:40 +0200 Subject: [PATCH] PR #1040. --- .../CPUMetaOptimizerTests.cs | 330 +++++++ .../OptimizationTests.cs | 286 +++++- .../CPU/MetaOptimizer.AdjustSOGPlayers.cs | 162 ++++ .../CPU/MetaOptimizer.Evaluator.cs | 390 ++++++++ .../CPU/MetaOptimizer.InitializePlayers.cs | 149 +++ .../CPU/MetaOptimizer.Instance.cs | 568 ++++++++++++ .../Optimization/CPU/MetaOptimizer.OGAndDG.cs | 192 ++++ .../CPU/MetaOptimizer.RawEvaluator.cs | 205 ++++ .../Optimization/CPU/MetaOptimizer.Scalar.cs | 100 ++ .../CPU/MetaOptimizer.UpdatePlayers.cs | 414 +++++++++ .../CPU/MetaOptimizer.Vectorized.cs | 117 +++ .../Optimization/CPU/MetaOptimizer.cs | 874 ++++++++++++++++++ .../Optimization/CPU/OptimizationFunction.cs | 142 +++ .../Optimization/CPU/PositionModifier.cs | 162 ++++ .../Optimization/IOptimizationFunction.cs | 38 +- Src/ILGPU/Util/ParallelCache.cs | 378 ++++++++ 16 files changed, 4485 insertions(+), 22 deletions(-) create mode 100644 Src/ILGPU.Algorithms.Tests.CPU/CPUMetaOptimizerTests.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.AdjustSOGPlayers.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Evaluator.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.InitializePlayers.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Instance.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.OGAndDG.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.RawEvaluator.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Scalar.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.UpdatePlayers.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Vectorized.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/OptimizationFunction.cs create mode 100644 Src/ILGPU.Algorithms/Optimization/CPU/PositionModifier.cs create mode 100644 Src/ILGPU/Util/ParallelCache.cs diff --git a/Src/ILGPU.Algorithms.Tests.CPU/CPUMetaOptimizerTests.cs b/Src/ILGPU.Algorithms.Tests.CPU/CPUMetaOptimizerTests.cs new file mode 100644 index 0000000000..b19765fc4a --- /dev/null +++ b/Src/ILGPU.Algorithms.Tests.CPU/CPUMetaOptimizerTests.cs @@ -0,0 +1,330 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: CPUMetaOptimizerTests.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.Algorithms.Optimization.CPU; +using ILGPU.Algorithms.Random; +using System; +using System.Threading.Tasks; +using Xunit; + +#if NET7_0_OR_GREATER + +#pragma warning disable CA1034 // Do not nest types +#pragma warning disable CA1819 // Properties should not return arrays + +namespace ILGPU.Algorithms.Tests.CPU +{ + /// + /// Contains tests to verify the functionality of the CPU-specialized + /// class. + /// + public class CPUMetaOptimizerTests + { + #region CPU Functions + + public interface IOptimizerTestFunction : + OptimizationTests.IPredefineTestFunction, + ICPUOptimizationFunction + { } + + public readonly record struct TestBreakFunction(float Goal) : + ICPUOptimizationBreakFunction + { + public bool Break(float evalType, int iteration) => + Math.Abs(evalType - Goal) < 1e-4f || iteration > 500; + } + + /// + /// Represents the Himmelblau function: + /// https://en.wikipedia.org/wiki/Test_functions_for_optimization + /// + public readonly record struct HimmelblauFunction : IOptimizerTestFunction + { + public float Evaluate(ReadOnlySpan position) => + OptimizationTests.HimmelblauFunction.Evaluate( + position[0], + position[1]); + + public bool CurrentIsBetter(float current, float proposed) => + current < proposed; + + public float Result => + new OptimizationTests.HimmelblauFunction().Result; + public float[] LowerBounds => + new OptimizationTests.HimmelblauFunction().LowerBounds; + public float[] UpperBounds => + new OptimizationTests.HimmelblauFunction().UpperBounds; + } + + /// + /// Represents the Easom function: + /// https://en.wikipedia.org/wiki/Test_functions_for_optimization + /// + public readonly record struct EasomFunction : IOptimizerTestFunction + { + public float Evaluate(ReadOnlySpan position) => + OptimizationTests.EasomFunction.Evaluate( + position[0], + position[1]); + + public bool CurrentIsBetter(float current, float proposed) => + current < proposed; + + public float Result => + new OptimizationTests.EasomFunction().Result; + public float[] LowerBounds => + new OptimizationTests.EasomFunction().LowerBounds; + public float[] UpperBounds => + new OptimizationTests.EasomFunction().UpperBounds; + } + /// + /// Represents the Shaffer function N4: + /// https://en.wikipedia.org/wiki/Test_functions_for_optimization + /// + public readonly record struct ShafferFunction4 : IOptimizerTestFunction + { + public float Evaluate(ReadOnlySpan position) => + OptimizationTests.ShafferFunction4.Evaluate( + position[0], + position[1]); + + public bool CurrentIsBetter(float current, float proposed) => + current < proposed; + + public float Result => + new OptimizationTests.ShafferFunction4().Result; + public float[] LowerBounds => + new OptimizationTests.ShafferFunction4().LowerBounds; + public float[] UpperBounds => + new OptimizationTests.ShafferFunction4().UpperBounds; + } + + /// + /// Represents the Rosenbrock function constrained to a disk + /// https://en.wikipedia.org/wiki/Test_functions_for_optimization + /// + public readonly record struct RosenbrockDisk : IOptimizerTestFunction + { + public float Evaluate(ReadOnlySpan position) => + OptimizationTests.RosenbrockDisk.Evaluate( + position[0], + position[1]); + + public bool CurrentIsBetter(float current, float proposed) => + current < proposed; + + public float Result => + new OptimizationTests.RosenbrockDisk().Result; + public float[] LowerBounds => + new OptimizationTests.RosenbrockDisk().LowerBounds; + public float[] UpperBounds => + new OptimizationTests.RosenbrockDisk().UpperBounds; + } + + /// + /// Represents the Gomez and Levy function: + /// https://en.wikipedia.org/wiki/Test_functions_for_optimization + /// + public readonly record struct GomezAndLevyFunction : IOptimizerTestFunction + { + public float Evaluate(ReadOnlySpan position) => + OptimizationTests.GomezAndLevyFunction.Evaluate( + position[0], + position[1]); + + public bool CurrentIsBetter(float current, float proposed) => + current < proposed; + + public float Result => + new OptimizationTests.GomezAndLevyFunction().Result; + public float[] LowerBounds => + new OptimizationTests.GomezAndLevyFunction().LowerBounds; + public float[] UpperBounds => + new OptimizationTests.GomezAndLevyFunction().UpperBounds; + } + + #endregion + + #region MemberData + + public static TheoryData< + object, + object, + object, + object, + object> TestData => + new TheoryData< + object, + object, + object, + object, + object> + { + { new HimmelblauFunction(), 8192, 0.5f, 0.5f, 0.5f }, + { new EasomFunction(), 81920, 0.5f, 0.5f, 0.5f }, + { new ShafferFunction4(), 8192, 0.5f, 0.5f, 0.5f }, + { new RosenbrockDisk(), 8192, 0.5f, 0.5f, 0.5f }, + { new GomezAndLevyFunction(), 81920, 0.5f, 0.5f, 0.5f }, + }; + + #endregion + + [Theory] + [MemberData(nameof(TestData))] + public void MetaOptimizationScalar( + TObjective objective, + int numParticles, + float stepSizeDefensive, + float stepSizeOffensive, + float stepSizeOffensiveSOG) + where TObjective : struct, IOptimizerTestFunction + { + int numDimensions = objective.LowerBounds.Length; + var random = new System.Random(13377331); + + using var optimizer = MetaOptimizer.CreateScalar< + float, + float, + RandomRanges.RandomRangeFloatProvider>( + random, + numParticles, + numDimensions); + + optimizer.LowerBounds = objective.LowerBounds; + optimizer.UpperBounds = objective.UpperBounds; + + optimizer.DefensiveStepSize = stepSizeDefensive; + optimizer.OffensiveStepSize = stepSizeOffensive; + optimizer.OffensiveSOGStepSize = stepSizeOffensiveSOG; + + var breakFunction = new TestBreakFunction(objective.Result); + var result = optimizer.Optimize( + objective, + breakFunction, + float.MaxValue); + + // The actually achievable result is 1e-6. However, as the RNG gives us + // non-deterministic results due to parallel processing, we limit ourselves + // to 1e-2 to make sure that the result lies roughly in the same ballpark + // what we were expecting + Assert.True(Math.Abs(result.Result - objective.Result) < 1e-2f); + } + + [Theory] + [MemberData(nameof(TestData))] + public void MetaOptimizationVectorized( + TObjective objective, + int numParticles, + float stepSizeDefensive, + float stepSizeOffensive, + float stepSizeOffensiveSOG) + where TObjective : struct, IOptimizerTestFunction + { + int numDimensions = objective.LowerBounds.Length; + var random = new System.Random(13377331); + + using var optimizer = MetaOptimizer.CreateVectorized< + float, + float, + RandomRanges.RandomRangeFloatProvider>( + random, + numParticles, + numDimensions); + + optimizer.LowerBounds = objective.LowerBounds; + optimizer.UpperBounds = objective.UpperBounds; + + optimizer.DefensiveStepSize = stepSizeDefensive; + optimizer.OffensiveStepSize = stepSizeOffensive; + optimizer.OffensiveSOGStepSize = stepSizeOffensiveSOG; + + var breakFunction = new TestBreakFunction(objective.Result); + var result = optimizer.Optimize( + objective, + breakFunction, + float.MaxValue); + + // The actually achievable result is 1e-6. However, as the RNG gives us + // non-deterministic results due to parallel processing, we limit ourselves + // to 1e-2 to make sure that the result lies roughly in the same ballpark + // what we were expecting + Assert.True(Math.Abs(result.Result - objective.Result) < 1e-2f); + } + + [Theory] + [MemberData(nameof(TestData))] + public void MetaOptimizationScalarRaw( + TObjective objective, + int numParticles, + float stepSizeDefensive, + float stepSizeOffensive, + float stepSizeOffensiveSOG) + where TObjective : struct, IOptimizerTestFunction + { + int numDimensions = objective.LowerBounds.Length; + var random = new System.Random(13377331); + + using var optimizer = MetaOptimizer.CreateScalar< + float, + float, + RandomRanges.RandomRangeFloatProvider>( + random, + numParticles, + numDimensions); + + optimizer.LowerBounds = objective.LowerBounds; + optimizer.UpperBounds = objective.UpperBounds; + + optimizer.DefensiveStepSize = stepSizeDefensive; + optimizer.OffensiveStepSize = stepSizeOffensive; + optimizer.OffensiveSOGStepSize = stepSizeOffensiveSOG; + + void EvaluatePosition( + Memory allPositions, + Memory evaluations, + int _, + int numPaddedDimensions, + int __, + Stride2D.DenseY positionStride, + ParallelOptions options) + { + for (int i = 0; i < numParticles; ++i) + // Parallel.For(0, numParticles, options, i => + { + int offset = positionStride.ComputeElementIndex((i, 0)); + int endOffset = positionStride.ComputeElementIndex( + (i, numPaddedDimensions)); + var position = allPositions.Slice(offset, endOffset - offset); + var result = objective.Evaluate(position.Span); + evaluations.Span[i] = result; + } + } + + var breakFunction = new TestBreakFunction(objective.Result); + var result = optimizer.OptimizeRaw( + EvaluatePosition, + breakFunction.Break, + objective.CurrentIsBetter, + float.MaxValue); + + // The actually achievable result is 1e-6. However, as the RNG gives us + // non-deterministic results due to parallel processing, we limit ourselves + // to 1e-2 to make sure that the result lies roughly in the same ballpark + // what we were expecting + Assert.True(Math.Abs(result.Result - objective.Result) < 1e-2f); + } + } +} + +#pragma warning restore CA1819 +#pragma warning restore CA1034 + +#endif diff --git a/Src/ILGPU.Algorithms.Tests/OptimizationTests.cs b/Src/ILGPU.Algorithms.Tests/OptimizationTests.cs index 7ab8acbbe3..1d70ad9292 100644 --- a/Src/ILGPU.Algorithms.Tests/OptimizationTests.cs +++ b/Src/ILGPU.Algorithms.Tests/OptimizationTests.cs @@ -20,6 +20,7 @@ using ILGPU.Tests; using System.Linq; using System.Numerics; +using System.Runtime.CompilerServices; using Xunit; using Xunit.Abstractions; @@ -34,11 +35,18 @@ public abstract partial class OptimizationTests : TestBase protected OptimizationTests(ITestOutputHelper output, TestContext testContext) : base(output, testContext) { } - + #if NET7_0_OR_GREATER - + #region Objectives - + + public interface IPredefineTestFunction + { + float Result { get; } + float[] LowerBounds { get; } + float[] UpperBounds { get; } + } + public readonly record struct DistanceF32x2(float Constant) : IOptimizationFunction { @@ -60,7 +68,271 @@ public float Evaluate( public bool CurrentIsBetter(float current, float proposed) => current <= proposed; } - + + /// + /// Represents the Himmelblau function: + /// https://en.wikipedia.org/wiki/Test_functions_for_optimization + /// + public readonly record struct HimmelblauFunction : + IOptimizationFunction, + IPredefineTestFunction + { + private static readonly float[] GlobalLowerBounds = new float[] + { + -5.0f, -5.0f + }; + + private static readonly float[] GlobalUpperBounds = new float[] + { + 5.0f, 5.0f + }; + + /// + /// The optimal result. + /// + public const float GlobalResult = 0.0f; + + /// + /// Evaluates the Himmelblau function. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float Evaluate(float x, float y) + { + float first = (x * x + y - 11); + float second = (x + y * y - 7); + return first * first + second * second; + } + + public float Result => GlobalResult; + public float[] LowerBounds => GlobalLowerBounds; + public float[] UpperBounds => GlobalUpperBounds; + + public float Evaluate( + LongIndex1D index, + Index1D dimension, + SingleVectorView positionView) + { + var first = positionView[0]; + return Evaluate(first.X, first.Y); + } + + public bool CurrentIsBetter(float current, float proposed) => + current < proposed; + } + + /// + /// Represents the Easom function: + /// https://en.wikipedia.org/wiki/Test_functions_for_optimization + /// + public readonly record struct EasomFunction : + IOptimizationFunction, + IPredefineTestFunction + { + private static readonly float[] GlobalLowerBounds = new float[] + { + -100.0f, -100.0f + }; + + private static readonly float[] GlobalUpperBounds = new float[] + { + 100.0f, 100.0f + }; + + /// + /// The optimal result. + /// + public const float GlobalResult = -1.0f; + + public float Result => GlobalResult; + public float[] LowerBounds => GlobalLowerBounds; + public float[] UpperBounds => GlobalUpperBounds; + + /// + /// Evaluates the Easom function. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float Evaluate(float x, float y) + { + float xPart = x - XMath.PI; + float yPart = y - XMath.PI; + return -XMath.Cos(x) * XMath.Cos(y) * + XMath.Exp(-(xPart * xPart + yPart * yPart)); + } + public float Evaluate( + LongIndex1D index, + Index1D dimension, + SingleVectorView positionView) + { + var first = positionView[0]; + return Evaluate(first.X, first.Y); + } + + public bool CurrentIsBetter(float current, float proposed) => + current < proposed; + } + + /// + /// Represents the Shaffer function N4: + /// https://en.wikipedia.org/wiki/Test_functions_for_optimization + /// + public readonly record struct ShafferFunction4 : + IOptimizationFunction, + IPredefineTestFunction + { + private static readonly float[] GlobalLowerBounds = new float[] + { + -100.0f, -100.0f + }; + + private static readonly float[] GlobalUpperBounds = new float[] + { + 100.0f, 100.0f + }; + + /// + /// The optimal result. + /// + public const float GlobalResult = 0.292579f; + + public float Result => GlobalResult; + public float[] LowerBounds => GlobalLowerBounds; + public float[] UpperBounds => GlobalUpperBounds; + + /// + /// Evaluates the Shaffer function. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float Evaluate(float x, float y) + { + float cos = XMath.Cos(XMath.Sin(XMath.Abs(x * x - y * y))); + float nominator = cos * cos - 0.5f; + float denominator = 1 + 0.001f * (x * x + y * y); + return 0.5f + nominator / (denominator * denominator); + } + public float Evaluate( + LongIndex1D index, + Index1D dimension, + SingleVectorView positionView) + { + var first = positionView[0]; + return Evaluate(first.X, first.Y); + } + + public bool CurrentIsBetter(float current, float proposed) => + current < proposed; + } + + /// + /// Represents the Rosenbrock function constrained to a disk + /// https://en.wikipedia.org/wiki/Test_functions_for_optimization + /// + public readonly record struct RosenbrockDisk : + IOptimizationFunction, + IPredefineTestFunction + { + private static readonly float[] GlobalLowerBounds = new float[] + { + -1.5f, -1.5f + }; + + private static readonly float[] GlobalUpperBounds = new float[] + { + 1.5f, 1.5f + }; + + /// + /// The optimal result. + /// + public const float GlobalResult = 0.0f; + + public float Result => GlobalResult; + public float[] LowerBounds => GlobalLowerBounds; + public float[] UpperBounds => GlobalUpperBounds; + + /// + /// Evaluates the constrained Rosenbrock function. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float Evaluate(float x, float y) + { + float xMin = 1.0f - x; + float x2 = x * x; + float result = xMin * xMin + 100.0f * (y - x2) * (y - x2); + if (x * x + y * y <= 2.0f) + return result; + return float.MaxValue; + } + + public float Evaluate( + LongIndex1D index, + Index1D dimension, + SingleVectorView positionView) + { + var first = positionView[0]; + return Evaluate(first.X, first.Y); + } + + public bool CurrentIsBetter(float current, float proposed) => + current < proposed; + } + + /// + /// Represents the Gomez and Levy function: + /// https://en.wikipedia.org/wiki/Test_functions_for_optimization + /// + public readonly record struct GomezAndLevyFunction : + IOptimizationFunction, + IPredefineTestFunction + { + private static readonly float[] GlobalLowerBounds = new float[] + { + -1.0f, -1.0f + }; + + private static readonly float[] GlobalUpperBounds = new float[] + { + 0.75f, 1.0f + }; + + /// + /// The optimal result. + /// + public const float GlobalResult = -1.031628453f; + + public float Result => GlobalResult; + public float[] LowerBounds => GlobalLowerBounds; + public float[] UpperBounds => GlobalUpperBounds; + + /// + /// Evaluates the constrained Gomez and Levy function. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float Evaluate(float x, float y) + { + float x2 = x * x; + float x4 = x2 * x2; + float y2 = y * y; + float y4 = y2 * y2; + float result = 4.0f * x2 + 2.1f * x4 + 1.0f / 3.0f * x4 * x2 + + x * y - 4.0f * y2 + 4.0f * y4; + float sin = XMath.Sin(2.0f * XMath.PI * y); + float conditionValue = -XMath.Sin(4.0f * XMath.PI * x) + 2.0f * sin * sin; + return conditionValue < 1.5f ? result : float.MaxValue; + } + + public float Evaluate( + LongIndex1D index, + Index1D dimension, + SingleVectorView positionView) + { + var first = positionView[0]; + return Evaluate(first.X, first.Y); + } + + public bool CurrentIsBetter(float current, float proposed) => + current < proposed; + } + #endregion #region MemberData @@ -129,7 +401,7 @@ public static TheoryData< #endregion - [SkippableTheory()] + [SkippableTheory] [MemberData(nameof(TestData))] public void ParticleSwarmOptimization< TFunc, @@ -157,7 +429,7 @@ public void ParticleSwarmOptimization< Skip.If( Accelerator.AcceleratorType == AcceleratorType.CPU && optimizerConfig.NumIterations * optimizerConfig.NumParticles > 2048); - + const int Seed = 24404699; using var pso = new PSO< TNumericType, @@ -185,7 +457,7 @@ public void ParticleSwarmOptimization< best, optimizerConfig.NumIterations); stream.Synchronize(); - + // Check result Assert.True( result.Result - delta <= expected, diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.AdjustSOGPlayers.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.AdjustSOGPlayers.cs new file mode 100644 index 0000000000..80133ed814 --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.AdjustSOGPlayers.cs @@ -0,0 +1,162 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: MetaOptimizer.AdjustSOGPlayers.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.Algorithms.Random; +using ILGPU.Util; +using System; +using System.Runtime.CompilerServices; +using System.Threading.Tasks; + +#if NET7_0_OR_GREATER + +namespace ILGPU.Algorithms.Optimization.CPU +{ + partial class MetaOptimizer + { + /// + /// A parallel processing state to adjust SOG-based information for all winning + /// offensive players from the current solver iteration. + /// + /// The random range provider type. + private class AdjustSOGPlayersState : InitializePlayersState + where TRandom : struct, IRandomRangeProvider + { + /// + /// Creates a new SOG players state. + /// + /// The random to use. + public AdjustSOGPlayersState(TRandom random) + : base(random) + { } + } + + /// + /// Updates all players according to defensive and offensive winners. + /// + /// The processor type being used. + /// The processing type. + /// The random range provider type. + private sealed class AdjustSOGPlayers : + ParallelProcessingCache< + AdjustSOGPlayersState, + AdjustSOGPlayers>, + IParallelProcessingBody> + where TProcessor : struct, IProcessor + where TType : unmanaged + where TRandom : struct, IRandomRangeProvider + { + private readonly MetaOptimizer parent; + private readonly Func, TRandom> getRandom; + + /// + /// Creates a new player update instance. + /// + /// The parent optimizer instance. + /// A function creating a new RNG instance. + public AdjustSOGPlayers( + MetaOptimizer instance, + Func, TRandom> createRandom) + { + parent = instance; + getRandom = createRandom; + } + + /// + /// Gets or sets the best known position vector. + /// + public ReadOnlyMemory BestPosition { get; set; } + + /// + /// Returns the current instance. + /// + protected override AdjustSOGPlayers + CreateBody() => this; + + /// + /// Creates an intermediate accumulation state. + /// + protected override AdjustSOGPlayersState + CreateIntermediate() => new(getRandom(parent)); + + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Initialize() { } + + /// + /// Adjusts all SOG-player positions from the current iteration while taking + /// SDG and best positions into account. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Process( + int index, + ParallelLoopState? _, + AdjustSOGPlayersState state) + { + // Load sog index and associated position vector + var offensiveIndex = parent.sogList[index]; + var offensive = parent + .GetNextPosition(offensiveIndex) + .CastUnsafe(); + + // Get two fresh random numbers + var r1 = state.Next(); + var r2 = state.Next(); + + // Get lower and upper bounds + var lowerBounds = parent.lowerBounds.AsSpan().CastUnsafe(); + var upperBounds = parent.upperBounds.AsSpan().CastUnsafe(); + + // Get best position and SDG + var bestPosition = BestPosition.Span.CastUnsafe(); + var sdg = parent.sdg.AsSpan().CastUnsafe(); + + // Create new processor for this step + var processor = TProcessor.New(); + for (int i = 0; i < offensive.Length; ++i) + { + // Get local offensive item ref + ref var offensiveVec = ref offensive.GetItemRef(i); + + // Compute new position and set new vector of offensive SOG player + var xOffNew3 = processor.DetermineNewPosition( + offensiveVec, + bestPosition.GetItemRef(i), + sdg.GetItemRef(1), + r1, + r2, + parent.OffensiveSOGStepSize); + + // Clamp new defensive position and store result + var clamped = processor.Clamp( + lowerBounds.GetItemRef(i), + upperBounds.GetItemRef(i), + xOffNew3); + offensiveVec = clamped; + } + } + + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Finalize( + ReadOnlySpan> intermediateStates) + { } + + } + } +} + +#endif diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Evaluator.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Evaluator.cs new file mode 100644 index 0000000000..18675bb69a --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Evaluator.cs @@ -0,0 +1,390 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: MetaOptimizer.Evaluator.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.Util; +using System; +using System.Runtime.CompilerServices; +using System.Threading.Tasks; + +#if NET7_0_OR_GREATER + +namespace ILGPU.Algorithms.Optimization.CPU +{ + partial class MetaOptimizer + { + /// + /// A parallel evaluation state storing temporary best result and position + /// information per thread. + /// + /// + /// The optimization function type to use. + /// + /// + /// The intermediate state type for each optimization processing thread. + /// + private sealed class EvaluatorState : DisposeBase + where TFunction : + IBaseOptimizationFunction, + IParallelCache + where TIntermediate : class + { + private TFunction function; + private TEvalType bestKnownResult; + private readonly T[] bestPosition; + + /// + /// Creates a new evaluation state. + /// + /// + /// The optimization function to use. + /// + /// + /// The number of padded dimensions taking vector lengths into account. + /// + public EvaluatorState(TFunction optimizationFunction, int numPaddedDimensions) + { + function = optimizationFunction; + bestPosition = new T[numPaddedDimensions]; + Intermediate = function.CreateIntermediate(); + } + + /// + /// Returns the intermediate state of this instance. + /// + public TIntermediate Intermediate { get; } + + /// + /// Resets the best known result to the given result value. + /// + /// The best result value to store. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Reset(TEvalType bestResult) + { + bestKnownResult = bestResult; + Array.Clear(bestPosition); + } + + /// + /// Merges the given result with the internally stored one. If the passed + /// result value is considered better than the stored one, the passed position + /// vector will be copied to the internally stored best position. + /// + /// The result value to merge. + /// + /// The position that led to the given result value. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void MergeWith(TEvalType result, ReadOnlySpan position) + { + if (function.CurrentIsBetter(bestKnownResult, result)) + return; + + bestKnownResult = result; + position.CopyTo(bestPosition); + } + + /// + /// Aggregates currently available information into the given result field. + /// If the objective function determines that the referenced result is worse + /// than the one stored internally, the referenced result value is updated + /// and the internally stored position is copied to the given result position + /// span. + /// + /// + /// A reference to the currently known best result. + /// + /// + /// A span pointing to the globally found best result position vector which + /// will be updated if the internally stored result value is considered + /// better than the referenced one. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AggregateInto(ref TEvalType result, Span resultPosition) + { + if (function.CurrentIsBetter(result, bestKnownResult)) + return; + + result = bestKnownResult; + bestPosition.CopyTo(resultPosition); + } + + /// + /// Disposes the intermediate state if required. + /// + protected override void Dispose(bool disposing) + { + if (Intermediate is IDisposable disposable) + disposable.Dispose(); + + base.Dispose(disposing); + } + } + + /// + /// Represents a result manager storing best result values. + /// + private struct ResultManager + { + private readonly T[] bestPosition; + private TEvalType bestResult; + + /// + /// Creates a new result manager. + /// + /// The parent optimizer. + /// + /// The best known result provided by the user. + /// + /// + /// The best known position provided by the user. + /// + public ResultManager( + MetaOptimizer optimizer, + in TEvalType bestUserKnownResult, + ReadOnlyMemory? bestKnownPosition) + { + // Validate our best known position vector + if (bestKnownPosition.HasValue && + bestKnownPosition.Value.Length != NumDimensions) + { + throw new ArgumentOutOfRangeException(nameof(bestKnownPosition)); + } + + bestPosition = new T[optimizer.NumPaddedDimensions]; + bestResult = BestInitialResult = bestUserKnownResult; + + NumDimensions = optimizer.NumDimensions; + + // Check for a valid best known result + if (!bestKnownPosition.HasValue) + { + // Reset best known position + for (int i = 0; i < bestPosition.Length; ++i) + bestPosition[i] = T.Zero; + } + else + { + // Copy known position + bestKnownPosition.Value.CopyTo(bestPosition); + + // Reset remaining parts + for (int i = NumDimensions; i < bestPosition.Length; ++i) + bestPosition[i] = T.Zero; + } + } + + /// + /// Returns the number of dimensions. + /// + public int NumDimensions { get; } + + /// + /// Returns the best found result. + /// + public readonly TEvalType BestResult => bestResult; + + /// + /// Returns the best known initial result. + /// + public TEvalType BestInitialResult { get; } + + /// + /// Returns the best found position (not padded). + /// + public readonly Memory BestPosition => + new(bestPosition, 0, NumDimensions); + + /// + /// Returns the best found internal position (padded). + /// + public readonly ReadOnlyMemory BestInternalPosition => bestPosition; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Aggregate( + EvaluatorState state) + where TFunction : + IBaseOptimizationFunction, + IParallelCache + where TIntermediate : class => + state.AggregateInto(ref bestResult, bestPosition); + } + + /// + /// Represents an abstract evaluator. + /// + private interface IEvaluator : IDisposable + { + /// + /// Returns the underlying result manager. + /// + ResultManager ResultManager { get; } + + /// + /// Evaluates all players. + /// + /// The parallel processing options. + void EvaluatePlayers(ParallelOptions options); + } + + /// + /// Represents an objective function evaluator that applies the user-defined + /// function to each player position in every step. + /// + /// The objective function type. + /// + /// The intermediate state type for each evaluator thread. + /// + /// The position modifier type. + private sealed class Evaluator : + ParallelProcessingCache< + EvaluatorState, + Evaluator>, + IParallelProcessingBody>, + IEvaluator + where TFunction : ICPUOptimizationFunction + where TIntermediate : class + where TModifier : ICPUPositionModifier + { + private readonly MetaOptimizer parent; + private TFunction function; + private TModifier modifier; + + private readonly int numPaddedDimensions; + private ResultManager resultManager; + + /// + /// Creates a new evaluator. + /// + /// The parent optimizer. + /// The optimization function. + /// The position modifier. + /// + /// The best known result provided by the user. + /// + /// + /// The best known position provided by the user. + /// + public Evaluator( + MetaOptimizer optimizer, + in TFunction optimizationFunction, + in TModifier positionModifier, + in TEvalType bestUserKnownResult, + ReadOnlyMemory? bestKnownPosition) + { + parent = optimizer; + function = optimizationFunction; + modifier = positionModifier; + + numPaddedDimensions = optimizer.NumPaddedDimensions; + resultManager = new(optimizer, bestUserKnownResult, bestKnownPosition); + } + + /// + /// Returns the result manager. + /// + public ResultManager ResultManager => resultManager; + + /// + /// Returns the current instance. + /// + protected override Evaluator< + TFunction, + TIntermediate, + TModifier> CreateBody() => this; + + /// + /// Creates an intermediate temporary state. + /// + protected override EvaluatorState + CreateIntermediate() => + new(function, numPaddedDimensions); + + /// + /// Resets the given intermediate state by using the best known result + /// provided by the user. + /// + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + protected override void InitializeIntermediate( + EvaluatorState intermediateState) + { + intermediateState.Reset(resultManager.BestInitialResult); + function.InitializeIntermediate(intermediateState.Intermediate); + } + + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Initialize() { } + + /// + /// Evaluates all players and accumulates intermediate results. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Process( + int index, + ParallelLoopState? loopState, + EvaluatorState intermediateState) + { + // Get the source position and evaluate + var positionMemory = parent.GetPositionMemory(index); + + // Adjust position + modifier.AdjustPosition( + index, + positionMemory, + resultManager.NumDimensions, + numPaddedDimensions); + + // Convert into a span and evaluate + var position = positionMemory.Span; + var result = function.Evaluate(position, intermediateState.Intermediate); + + // Store evaluation result + parent.evaluations[index] = result; + + // Merge intermediate state + intermediateState.MergeWith(result, position); + } + + /// + /// Aggregates all temporarily found best results into a globally shared + /// state to find the best solution taking all solutions into account. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Finalize( + ReadOnlySpan> intermediateStates) + { + // Iterate over all states and aggregate all information + foreach (var state in intermediateStates) + { + function.FinishProcessing(state.Intermediate); + resultManager.Aggregate(state); + } + } + + /// + /// Evaluates all players in parallel using the underlying modifier, eval + /// function, and comparison functions. + /// + /// The parallel processing options. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void EvaluatePlayers(ParallelOptions options) => + ParallelFor(0, parent.NumPlayers, options); + } + } +} + +#endif diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.InitializePlayers.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.InitializePlayers.cs new file mode 100644 index 0000000000..f83463a158 --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.InitializePlayers.cs @@ -0,0 +1,149 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: MetaOptimizer.InitializePlayers.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.Algorithms.Random; +using ILGPU.Util; +using System; +using System.Runtime.CompilerServices; +using System.Threading.Tasks; + +#if NET7_0_OR_GREATER + +namespace ILGPU.Algorithms.Optimization.CPU +{ + partial class MetaOptimizer + { + /// + /// A parallel processing state for player initialization based on random number + /// generators used during placement of players. + /// + /// A random provider type. + /// A processing type. + private class InitializePlayersState + where TType : unmanaged + where TRandom : struct, IRandomRangeProvider + { + private TRandom randomProvider; + + /// + /// Creates a new initialization state. + /// + /// The random provider to use. + public InitializePlayersState(TRandom random) + { + randomProvider = random; + } + + /// + /// Draws a random number using the given CPU-based RNG provider. + /// + /// The drawn random number. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TType Next() => randomProvider.Next(); + } + + /// + /// A player position initializer. + /// + /// The processor type. + /// The processor element type. + /// The random provider type. + private sealed class InitializePlayers : + ParallelProcessingCache< + InitializePlayersState, + InitializePlayers>, + IParallelProcessingBody> + where TProcessor : struct, IProcessor + where TType : unmanaged + where TRandom : struct, IRandomRangeProvider + { + private readonly MetaOptimizer parent; + private readonly Func, TRandom> getRandom; + + /// + /// Creates a new player initializer. + /// + /// The parent optimizer. + /// A function creating a new RNG instance. + public InitializePlayers( + MetaOptimizer optimizer, + Func, TRandom> createRandom) + { + parent = optimizer; + getRandom = createRandom; + } + + /// + /// Returns the current instance. + /// + protected override InitializePlayers + CreateBody() => this; + + /// + /// Creates an intermediate state which uses the parent RNG to create fresh + /// random numbers in parallel. + /// + protected override InitializePlayersState + CreateIntermediate() => new(getRandom(parent)); + + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Initialize() { } + + /// + /// Accumulates offensive and defensive players into OG and DG vectors. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Process( + int index, + ParallelLoopState? loopState, + InitializePlayersState intermediateState) + { + // Get player and the local bounds + var player = parent.GetPosition(index).CastUnsafe(); + var lower = parent.lowerBounds.AsSpan().CastUnsafe(); + var upper = parent.upperBounds.AsSpan().CastUnsafe(); + + // Initialize a new processor + var processor = TProcessor.New(); + + // Initialize all player positions + for (int i = 0; i < parent.NumDimensionSlices; ++i) + { + // Draw a new random value + var randomValue = intermediateState.Next(); + + // Initialize local position + var initialPosition = processor.GetRandomPosition( + lower.GetItemRef(i), + upper.GetItemRef(i), + randomValue); + player.GetItemRef(i) = initialPosition; + } + } + + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Finalize( + ReadOnlySpan> + intermediateStates) + { } + } + } +} + +#endif diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Instance.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Instance.cs new file mode 100644 index 0000000000..5bc677e417 --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Instance.cs @@ -0,0 +1,568 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: MetaOptimizer.Instance.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.Algorithms.Random; +using ILGPU.Util; +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Threading.Tasks; + +#if NET7_0_OR_GREATER + +#pragma warning disable CA1000 // No static members on generic types + +namespace ILGPU.Algorithms.Optimization.CPU +{ + partial class MetaOptimizer + { + /// + /// Holds intermediate and run-specific optimizer instances that depend on + /// objective function and random instances. + /// + /// The internal evaluator type. + /// The objective function type. + /// + /// The type of all intermediate states during processing. + /// + /// The processor type being used. + /// The processor element type. + /// The random range generator type. + sealed class RuntimeInstance< + TEvaluator, + TFunction, + TIntermediate, + TProcessor, + TType, + TRandom> : DisposeBase + where TEvaluator : class, IEvaluator + where TFunction : IBaseOptimizationFunction + where TIntermediate : class + where TProcessor : struct, IProcessor + where TType : unmanaged + where TRandom : struct, IRandomRangeProvider + { + private readonly MetaOptimizer optimizer; + private readonly TEvaluator evaluator; + private readonly UpdatePlayers< + TFunction, + TProcessor, + TType, + TRandom> updatePlayers; + + /// + /// Creates a new runtime instance. + /// + /// The parent optimizer. + /// + /// A specialized random provider generator. + /// + /// The objective function. + /// The evaluator instance. + public RuntimeInstance( + MetaOptimizer parent, + Func, TRandom> createRandom, + in TFunction function, + TEvaluator evaluatorInstance) + { + optimizer = parent; + evaluator = evaluatorInstance; + updatePlayers = new(parent, createRandom, function) + { + BestPosition = evaluator.ResultManager.BestInternalPosition + }; + } + + /// + /// Returns the best result manager. + /// + public ResultManager ResultManager => evaluator.ResultManager; + + /// + /// Evaluates all player positions. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void EvaluatePlayers(ParallelOptions options) => + evaluator.EvaluatePlayers(options); + + /// + /// Updates all player positions. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void UpdatePlayers(ParallelOptions options) + { + updatePlayers.ParallelFor(0, optimizer.M, options); + + // Update SOG and SDG information + updatePlayers.HasCurrentSOGAndSDG = true; + } + + /// + /// Disposes the current evaluator and the specialized update players + /// instance. + /// + protected override void Dispose(bool disposing) + { + if (disposing) + { + evaluator.Dispose(); + updatePlayers.Dispose(); + } + base.Dispose(disposing); + } + } + + /// + /// An instance implementing + /// + /// The processor type being used. + /// The processor element type. + /// + /// The random range generator type for scalar types. + /// + /// + /// The random range generator type for specialized processing types. + /// + sealed class Instance< + TProcessor, + TType, + TRandom, + TTypeRandom> : MetaOptimizer + where TProcessor : struct, IProcessor + where TType : unmanaged + where TRandom : struct, IRandomRangeProvider + where TTypeRandom : struct, IRandomRangeProvider + { + private readonly ParallelOptions parallelOptions; + private readonly OGAndDG ogAndDG; + private readonly AdjustSOGPlayers< + TProcessor, + TType, + TRandom> adjustSOGPlayers; + private readonly InitializePlayers< + TProcessor, + TType, + TTypeRandom> initializePlayers; + + private readonly Func, TRandom> getRandom; + + /// + /// Creates a new meta optimizer instance. + /// + /// The input random number generator. + /// The number of players to use. + /// The dimensionality of the problem. + /// + /// The maximum number of parallel processing threads (if any). + /// + /// + /// A function callback to create random range generators for type T. + /// + /// + /// A function callback to create random range generators for type TType. + /// + public Instance( + System.Random inputRandom, + int numPlayers, + int numDimensions, + int? maxNumParallelThreads, + Func, TRandom> createRandom, + Func, TTypeRandom> createTTypeRandom) + : base( + inputRandom, + numPlayers, + numDimensions, + maxNumParallelThreads, + TProcessor.Length) + { + ogAndDG = new(this); + adjustSOGPlayers = new(this, createRandom); + initializePlayers = new(this, createTTypeRandom); + + getRandom = createRandom; + + // Create new parallel options limiting the max degree of parallelism + parallelOptions = new ParallelOptions() + { + MaxDegreeOfParallelism = MaxNumWorkers, + }; + } + + /// + /// Optimizes the given optimization function while using a specified + /// break function and initial values for the best result. + /// + /// The optimization function type. + /// + /// The intermediate optimization state type. + /// + /// The break function type. + /// The position modifier type. + /// + /// The optimization function to use. + /// + /// The break function to use. + /// + /// The position modifier to apply to all position updates during + /// optimization. + /// + /// Te best known result. + /// The best known position. + /// + /// A tuple consisting of the best result and position found. + /// + public override (TEvalType Result, Memory Position) Optimize< + TFunction, + TIntermediate, + TBreakFunction, + TModifier>( + in TFunction optimizationFunction, + in TBreakFunction breakFunction, + in TModifier positionModifier, + TEvalType bestResult, + ReadOnlyMemory? bestKnownPosition = default) + { + // Create new evaluator based on the given optimization function + var evaluator = new Evaluator( + this, + optimizationFunction, + positionModifier, + bestResult, + bestKnownPosition); + + // Create a new runtime instance to track all instances for this run + using var runtimeInstance = new RuntimeInstance< + Evaluator, + TFunction, + TIntermediate, + TProcessor, + TType, + TRandom>( + this, + getRandom, + optimizationFunction, + evaluator); + + // Perform optimization + OptimizeInternal(breakFunction, runtimeInstance); + + // Load best result information + var resultManager = runtimeInstance.ResultManager; + return (resultManager.BestResult, resultManager.BestPosition); + } + + public override (TEvalType Result, Memory Position) OptimizeRaw( + RawCPUOptimizationFunction optimizationFunction, + CPUOptimizationBreakFunction breakFunction, + CPUEvaluationComparison evaluationComparison, + TEvalType bestResult, + ReadOnlyMemory? bestKnownPosition = default) + { + // Create new evaluator based on the given optimization function + var evaluator = new RawEvaluator( + this, + optimizationFunction, + evaluationComparison, + bestResult, + bestKnownPosition); + + // Create our raw function wrapper + var wrapper = new RawComparisonWrapper(evaluationComparison); + + // Create a new runtime instance to track all instances for this run + using var runtimeInstance = new RuntimeInstance< + RawEvaluator, + RawComparisonWrapper, + object, + TProcessor, + TType, + TRandom>( + this, + getRandom, + wrapper, + evaluator); + + // Perform optimization + var breakFunctionWrapper = new BreakFunctionWrapper(breakFunction); + OptimizeInternal(breakFunctionWrapper, runtimeInstance); + + // Load best result information + var resultManager = runtimeInstance.ResultManager; + return (resultManager.BestResult, resultManager.BestPosition); + } + + /// + /// The internal optimizer loop which used the SGO algorithm to adjust + /// player/particle positions according to the objective functions and the + /// update parameters defined. + /// + /// The break function to use. + /// + /// The current runtime instance holding all temporary instances. + /// + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + private void OptimizeInternal< + TFunction, + TIntermediate, + TBreakFunction, + TEvaluator>( + in TBreakFunction breakFunction, + RuntimeInstance< + TEvaluator, + TFunction, + TIntermediate, + TProcessor, + TType, + TRandom> runtimeInstance) + where TEvaluator : class, IEvaluator + where TFunction : IBaseOptimizationFunction + where TIntermediate : class + where TBreakFunction : ICPUOptimizationBreakFunction + { + // Update internal references + adjustSOGPlayers.BestPosition = + runtimeInstance.ResultManager.BestInternalPosition; + + // Initialize all players + initializePlayers.ParallelFor(0, NumPlayers, parallelOptions); + + // Evaluate all players first + runtimeInstance.EvaluatePlayers(parallelOptions); + + // Enter actual optimizer loop + for (int iteration = 0; ; ++iteration) + { + // Permute all indices in the beginning + Permute(); + + // Copy positions to new versions + CopyPositions(); + + // Initialize all SOG information + InitSOGList(); + + // Compute OG and DG information + ogAndDG.ParallelFor(0, M, parallelOptions); + + // Update all players + runtimeInstance.UpdatePlayers(parallelOptions); + + // Update SOG adjustments + if (iteration > 0) + adjustSOGPlayers.ParallelFor(0, sogListCounter, parallelOptions); + + // Finally, swap all buffers + SwapBuffers(); + + // Evaluate all players + runtimeInstance.EvaluatePlayers(parallelOptions); + + // Check for user-defined break predicates + if (breakFunction.Break( + runtimeInstance.ResultManager.BestResult, + iteration)) + { + break; + } + } + } + + #region IDisposable + + /// + /// Disposes internal parallel cache instances. + /// + protected override void Dispose(bool disposing) + { + if (disposing) + { + ogAndDG.Dispose(); + adjustSOGPlayers.Dispose(); + initializePlayers.Dispose(); + } + + base.Dispose(disposing); + } + + #endregion + } + + /// + /// Creates a new meta optimizer using non-vectorized scalar operations. + /// + /// The random range provider type to use. + /// The input random number generator. + /// + /// The number of players to use (must be at least two and an even number). + /// + /// + /// The number of dimensions (must be greater than one). + /// + /// + /// The maximum number of parallel threads (if any). Not providing a specific + /// number of threads means using as many threads as possible. + /// + /// The created meta optimizer instance. + [CLSCompliant(false)] + public static MetaOptimizer CreateScalar( + System.Random inputRandom, + int numPlayers, + int numDimensions, + int? maxNumParallelThreads = null) + where TRandom : struct, IRandomRangeProvider + { + // Creates new random range generators using the scalar type T + TRandom CreateRandom(MetaOptimizer parent) => + TRandom.Create(parent.random, T.Zero, T.One); + + return new Instance( + inputRandom, + numPlayers, + numDimensions, + maxNumParallelThreads, + CreateRandom, + CreateRandom); + } + + /// + /// Creates a new meta optimizer using vectorized operations. + /// + /// The random range provider type to use. + /// The input random number generator. + /// + /// The number of players to use (must be at least two and an even number). + /// + /// + /// The number of dimensions (must be greater than one). + /// + /// + /// The maximum number of parallel threads (if any). Not providing a specific + /// number of threads means using as many threads as possible. + /// + /// The created meta optimizer instance. + [CLSCompliant(false)] + public static MetaOptimizer CreateVectorized( + System.Random inputRandom, + int numPlayers, + int numDimensions, + int? maxNumParallelThreads = null) + where TRandom : struct, IRandomRangeProvider + { + // Creates new random range generators using the scalar type T + TRandom CreateRandom(MetaOptimizer parent) => + TRandom.Create(parent.random, T.Zero, T.One); + + // Creates new random range generators using the vectorized type TType + RandomRangeVectorProvider CreateVectorizedRandom( + MetaOptimizer parent) => + CreateRandom(parent).CreateVectorProvider(); + + return new Instance< + VectorizedProcessor, + Vector, + TRandom, + RandomRangeVectorProvider>( + inputRandom, + numPlayers, + numDimensions, + maxNumParallelThreads, + CreateRandom, + CreateVectorizedRandom); + } + } + + /// + /// A static helper class for instances. + /// + public static class MetaOptimizer + { + #region Static + + /// + /// Creates a new meta optimizer using non-vectorized scalar operations. + /// + /// + /// The main element type for all position vectors. + /// + /// The evaluation data type. + /// The random range provider type to use. + /// The input random number generator. + /// + /// The number of players to use (must be at least two and an even number). + /// + /// + /// The number of dimensions (must be greater than one). + /// + /// + /// The maximum number of parallel threads (if any). Not providing a specific + /// number of threads means using as many threads as possible. + /// + /// The created meta optimizer instance. + [CLSCompliant(false)] + public static MetaOptimizer CreateScalar( + System.Random inputRandom, + int numPlayers, + int numDimensions, + int? maxNumParallelThreads = null) + where T : unmanaged, INumber + where TEvalType : struct, IEquatable + where TRandom : struct, IRandomRangeProvider => + MetaOptimizer.CreateScalar( + inputRandom, + numPlayers, + numDimensions, + maxNumParallelThreads); + + /// + /// Creates a new meta optimizer using vectorized operations. + /// + /// + /// The main element type for all position vectors. + /// + /// The evaluation data type. + /// The random range provider type to use. + /// The input random number generator. + /// + /// The number of players to use (must be at least two and an even number). + /// + /// + /// The number of dimensions (must be greater than one). + /// + /// + /// The maximum number of parallel threads (if any). Not providing a specific + /// number of threads means using as many threads as possible. + /// + /// The created meta optimizer instance. + [CLSCompliant(false)] + public static MetaOptimizer CreateVectorized< + T, + TEvalType, + TRandom>( + System.Random inputRandom, + int numPlayers, + int numDimensions, + int? maxNumParallelThreads = null) + where T : unmanaged, INumber + where TEvalType : struct, IEquatable + where TRandom : struct, IRandomRangeProvider => + MetaOptimizer.CreateVectorized( + inputRandom, + numPlayers, + numDimensions, + maxNumParallelThreads); + + #endregion + } +} + +#pragma warning restore CA1000 + +#endif diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.OGAndDG.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.OGAndDG.cs new file mode 100644 index 0000000000..013a1b32ad --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.OGAndDG.cs @@ -0,0 +1,192 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: MetaOptimizer.OGAndDG.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.Util; +using System; +using System.Runtime.CompilerServices; +using System.Threading.Tasks; + +#if NET7_0_OR_GREATER + +namespace ILGPU.Algorithms.Optimization.CPU +{ + partial class MetaOptimizer + { + /// + /// Represents an intermediate parallel processing state for OG and DG state. + /// + private sealed class OGAndDGState + { + private readonly T[] nextOG; + private readonly T[] nextDG; + + /// + /// Creates a new intermediate state. + /// + /// The number of dimensions. + public OGAndDGState(int numDimensions) + { + nextOG = new T[numDimensions]; + nextDG = new T[numDimensions]; + } + + /// + /// Returns a span of the given processing type pointing to the next OG. + /// + /// The processing type. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Span GetNextOG() where TType : struct => + nextOG.AsSpan().CastUnsafe(); + + /// + /// Returns a span of the given processing type pointing to the next DG. + /// + /// The processing type. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Span GetNextDG() where TType : struct => + nextDG.AsSpan().CastUnsafe(); + } + + /// + /// Computes OG and DG information. + /// + /// The processor type. + /// The processor element type. + private sealed class OGAndDG : + ParallelProcessingCache>, + IParallelProcessingBody + where TProcessor : struct, IProcessor + where TType : unmanaged + { + private readonly MetaOptimizer parent; + private readonly T convertedM; + + /// + /// Creates a new OG and DG computer. + /// + /// The parent optimizer. + public OGAndDG(MetaOptimizer optimizer) + { + parent = optimizer; + convertedM = T.CreateTruncating(optimizer.M); + } + + /// + /// Returns the current instance. + /// + protected override OGAndDG CreateBody() => this; + + /// + /// Creates an intermediate temporary accumulation array of two times the + /// dimension size. + /// + protected override OGAndDGState CreateIntermediate() => + new(parent.NumPaddedDimensions); + + /// + /// Resets the given intermediate state by resetting all values to T.Zero. + /// + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + protected override void InitializeIntermediate( + OGAndDGState intermediateState) + { + var nextOG = intermediateState.GetNextOG(); + var nextDG = intermediateState.GetNextDG(); + + parent.Reset(nextOG, nextDG); + } + + /// + /// Resets parent OG and DG vectors for accumulation purposes. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Initialize() + { + // Reset OG and DG vectors + var og = parent.og.AsSpan().CastUnsafe(); + var dg = parent.dg.AsSpan().CastUnsafe(); + + parent.Reset(og, dg); + } + + /// + /// Accumulates offensive and defensive players into OG and DG vectors. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Process( + int index, + ParallelLoopState? loopState, + OGAndDGState intermediateState) + { + // Get offsets and spans for offensive and defensive players + var indices = parent.indices.AsSpan(); + int offensiveIndex = indices.GetItemRef(index); + int defensiveIndex = indices.GetItemRef(index + parent.M); + + // Get the actual source views + var offensive = parent + .GetPosition(offensiveIndex) + .CastUnsafe(); + var defensive = parent + .GetPosition(defensiveIndex) + .CastUnsafe(); + + // Get the actual target views + var og = intermediateState.GetNextOG(); + var dg = intermediateState.GetNextDG(); + + // Accumulate all intermediates + parent.Accumulate( + og, + dg, + offensive, + defensive); + } + + /// + /// Accumulates all intermediate OG and DG states while averaging the result. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Finalize(ReadOnlySpan intermediateStates) + { + var og = parent.og.AsSpan().CastUnsafe(); + var dg = parent.dg.AsSpan().CastUnsafe(); + + // Iterate over all dimensions and states accumulate results + foreach (var state in intermediateStates) + { + var sourceOG = state.GetNextOG(); + var sourceDG = state.GetNextDG(); + + parent.Accumulate( + og, + dg, + sourceOG, + sourceDG); + } + + // Compute averages over all dimension slices + parent.ComputeAverage( + og, + dg, + convertedM); + } + } + } +} + +#endif diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.RawEvaluator.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.RawEvaluator.cs new file mode 100644 index 0000000000..57f08e2545 --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.RawEvaluator.cs @@ -0,0 +1,205 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: MetaOptimizer.RawEvaluator.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.Util; +using System; +using System.Runtime.CompilerServices; +using System.Threading.Tasks; + +#if NET7_0_OR_GREATER + +namespace ILGPU.Algorithms.Optimization.CPU +{ + partial class MetaOptimizer + { + /// + /// Represents a comparison interface wrapper around a delegate comparison + /// function used to compare evaluation results. + /// + /// The evaluation delegate. + private readonly record struct RawComparisonWrapper( + CPUEvaluationComparison EvaluationComparison) : + IBaseOptimizationFunction, + IParallelCache + { + /// + /// Represents a shared intermediate state holding a valid object instance. + /// + public static readonly object SharedIntermediateState = new(); + + /// + /// Invokes the underlying comparison delegate to compare current and proposed + /// evaluation instances. + /// + public bool CurrentIsBetter(TEvalType current, TEvalType proposed) => + EvaluationComparison(current, proposed); + + /// + /// Returns the shared intermediate state object. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public object CreateIntermediate() => SharedIntermediateState; + + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void InitializeIntermediate(object intermediateState) { } + + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FinishProcessing(object intermediateState) { } + } + + /// + /// Represents an objective function evaluator that applies the user-defined + /// function to each player position in every step. + /// + private sealed class RawEvaluator : + ParallelProcessingCache< + EvaluatorState, + RawEvaluator>, + IParallelProcessingBody>, + IEvaluator + { + private readonly MetaOptimizer parent; + private readonly RawCPUOptimizationFunction function; + private readonly CPUEvaluationComparison comparison; + + private readonly int numPaddedDimensions; + private ResultManager resultManager; + + /// + /// Creates a new evaluator. + /// + /// The parent optimizer. + /// The optimization function. + /// The eval comparision function. + /// + /// The best known result provided by the user. + /// + /// + /// The best known position provided by the user. + /// + public RawEvaluator( + MetaOptimizer optimizer, + RawCPUOptimizationFunction optimizationFunction, + CPUEvaluationComparison evaluationComparison, + in TEvalType bestUserKnownResult, + ReadOnlyMemory? bestKnownPosition) + { + parent = optimizer; + function = optimizationFunction; + comparison = evaluationComparison; + + numPaddedDimensions = optimizer.NumPaddedDimensions; + resultManager = new(optimizer, bestUserKnownResult, bestKnownPosition); + } + + /// + /// Returns the result manager. + /// + public ResultManager ResultManager => resultManager; + + /// + /// Returns the current instance. + /// + protected override RawEvaluator CreateBody() => this; + + /// + /// Creates an intermediate temporary state. + /// + protected override EvaluatorState< + RawComparisonWrapper, + object> CreateIntermediate() => + new(new(comparison), numPaddedDimensions); + + /// + /// Resets the given intermediate state by using the best known result + /// provided by the user. + /// + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + protected override void InitializeIntermediate( + EvaluatorState intermediateState) => + intermediateState.Reset(resultManager.BestInitialResult); + + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Initialize() { } + + /// + /// Evaluates all players and accumulates intermediate results. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Process( + int index, + ParallelLoopState? loopState, + EvaluatorState intermediateState) + { + // Get the source position + var position = parent.GetPosition(index); + + // Get the evaluation result + var result = parent.evaluations[index]; + + // Merge intermediate state + intermediateState.MergeWith(result, position); + } + + /// + /// Aggregates all temporarily found best results into a globally shared + /// state to find the best solution taking all solutions into account. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Finalize( + ReadOnlySpan< + EvaluatorState> intermediateStates) + { + // Iterate over all states and aggregate all information + foreach (var state in intermediateStates) + resultManager.Aggregate(state); + } + + /// + /// Evaluates all players using the given raw evaluation function first. + /// After having evaluated all particle positions, it reduces all results + /// in parallel. + /// + /// The parallel processing options. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void EvaluatePlayers(ParallelOptions options) + { + // Evaluate all players using the provided raw function + function( + parent.positions.AsMemory(), + parent.evaluations.AsMemory(), + ResultManager.NumDimensions, + parent.NumPaddedDimensions, + parent.NumPlayers, + new(parent.NumPaddedDimensions), + options); + + // Reduce all results in parallel + ParallelFor(0, parent.NumPlayers, options); + } + } + } +} + +#endif diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Scalar.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Scalar.cs new file mode 100644 index 0000000000..ae14ac88a7 --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Scalar.cs @@ -0,0 +1,100 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: MetaOptimizer.Scalar.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using System.Runtime.CompilerServices; + +#if NET7_0_OR_GREATER + +namespace ILGPU.Algorithms.Optimization.CPU +{ + partial class MetaOptimizer + { + /// + /// A scalar processor using default ALUs. + /// + private readonly struct ScalarProcessor : IProcessor + { + /// + /// Creates a new scalar processor. + /// + public static ScalarProcessor New() => default; + + /// + /// Returns 1; + /// + public static int Length => 1; + + /// + /// Clamps the given value. + /// + /// The lower bounds part. + /// The upper bounds part. + /// The value to clamp. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public T Clamp(T lower, T upper, T value) => + T.Clamp(value, lower, upper); + + /// + /// Resets the given data view. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Reset(out T data) => data = T.Zero; + + /// + /// Adds the given source to the target view. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Accumulate(ref T target, T source) => + target += source; + + /// + /// Computes the average by taking the given count into account. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void ComputeAverage(ref T target, T count) => + target /= count; + + /// + /// Determines a newly sampled position using scalars. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public T GetRandomPosition(T lower, T upper, T randomNumber) + { + // Interpolate between lower and upper bound + var lowerInfluence = (T.One - randomNumber) * lower; + var upperInfluence = randomNumber * upper; + return lowerInfluence + upperInfluence; + } + + /// + /// Determines a newly sampled position using scalars. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public T DetermineNewPosition( + T position, + T firstC, + T secondC, + T r1, + T r2, + T stepSize) + { + // Determine new offset to use + var newOffset = r1 * firstC - r2 * secondC; + + // Compute final position + var finalPos = position + newOffset * stepSize; + return finalPos; + } + } + } +} + +#endif diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.UpdatePlayers.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.UpdatePlayers.cs new file mode 100644 index 0000000000..9f1d0a314d --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.UpdatePlayers.cs @@ -0,0 +1,414 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: MetaOptimizer.UpdatePlayers.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.Algorithms.Random; +using ILGPU.Util; +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; + +#if NET7_0_OR_GREATER + +namespace ILGPU.Algorithms.Optimization.CPU +{ + partial class MetaOptimizer + { + /// + /// Represents an intermediate parallel processing state for updating players. + /// + /// The random provider type. + private sealed class UpdatePlayersState : AdjustSOGPlayersState + where TRandom : struct, IRandomRangeProvider + { + private readonly T[] nextSOG; + private readonly T[] nextSDG; + + private int nextSOGCounter; + private int nextSDGCounter; + + /// + /// Creates new intermediate state. + /// + /// The random provider instance. + /// The number of dimensions. + public UpdatePlayersState(TRandom provider, int numDimensions) + : base(provider) + { + nextSOG = new T[numDimensions]; + nextSDG = new T[numDimensions]; + } + + /// + /// Resets all internally stored counters. + /// + public void ResetCounters() + { + nextSOGCounter = 0; + nextSDGCounter = 0; + } + + /// + /// Adds a new SOG member. + /// + public void AddSOGMember() => ++nextSOGCounter; + + /// + /// Adds a new SDG member. + /// + public void AddSDGMember() => ++nextSDGCounter; + + /// + /// Returns a span of the given processing type pointing to the next SOG. + /// + /// The processing type. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Span GetNextSOG() where TType : struct => + nextSOG.AsSpan().CastUnsafe(); + + /// + /// Returns a span of the given processing type pointing to the next SDG. + /// + /// The processing type. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Span GetNextSDG() where TType : struct => + nextSDG.AsSpan().CastUnsafe(); + + /// + /// Accumulates externally provided counters for SOG and SDG members. + /// + public void AccumulateCounters(ref int sogMembers, ref int sdgMembers) + { + sogMembers += nextSOGCounter; + sdgMembers += nextSDGCounter; + } + } + + /// + /// Updates all players according to defensive and offensive winners. + /// + /// The objective function type to use. + /// The processor type being used. + /// The processor element type. + /// The random provider type. + private sealed class UpdatePlayers< + TFunction, + TProcessor, + TType, + TRandom> : + ParallelProcessingCache< + UpdatePlayersState, + UpdatePlayers< + TFunction, + TProcessor, + TType, + TRandom>>, + IParallelProcessingBody> + where TFunction : IBaseOptimizationFunction + where TProcessor : struct, IProcessor + where TType : unmanaged + where TRandom : struct, IRandomRangeProvider + { + private readonly MetaOptimizer parent; + private readonly Func, TRandom> getRandom; + private readonly TFunction function; + + private volatile bool hasSOGAndSDG; + + /// + /// Creates a new player update instance. + /// + /// The parent optimizer instance. + /// A function creating a new RNG instance. + /// The objective function. + public UpdatePlayers( + MetaOptimizer optimizer, + Func, TRandom> createRandom, + in TFunction optimizationFunction) + { + parent = optimizer; + getRandom = createRandom; + function = optimizationFunction; + + NumDimensionSlices = optimizer.NumDimensionSlices; + } + + /// + /// Returns the current instance. + /// + protected override UpdatePlayers< + TFunction, + TProcessor, + TType, + TRandom> CreateBody() => this; + + /// + /// Returns the number of dimensions per processing step. + /// + public int NumDimensionSlices { get; } + + /// + /// Returns true if SOG and SDG information has been available. + /// + public bool HasCurrentSOGAndSDG + { + get => hasSOGAndSDG; + set => hasSOGAndSDG = value; + } + + /// + /// Gets or sets the best known position vector. + /// + public ReadOnlyMemory BestPosition { get; set; } + + /// + /// Creates an intermediate temporary state. + /// + protected override UpdatePlayersState CreateIntermediate() => + new(getRandom(parent), parent.NumPaddedDimensions); + + /// + /// Resets the given intermediate state by resetting all values to T.Zero. + /// + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + protected override void InitializeIntermediate( + UpdatePlayersState intermediateState) + { + // Reset next SOG and SDG vectors + var nextSOG = intermediateState.GetNextSOG(); + var nextSDG = intermediateState.GetNextSDG(); + + parent.Reset(nextSOG, nextSDG); + + // Reset SOG and SDG counters + intermediateState.ResetCounters(); + } + + /// + /// Resets the next SOG and SDG vectors. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Initialize() + { + // Reset parent next SOG and SDG vectors + var nextSOG = parent.nextSOG.AsSpan().CastUnsafe(); + var nextSDG = parent.nextSDG.AsSpan().CastUnsafe(); + + parent.Reset(nextSOG, nextSDG); + } + + /// + /// Accumulates offensive and defensive players into OG and DG vectors. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Process( + int index, + ParallelLoopState? _, + UpdatePlayersState state) + { + // Get offsets and spans for offensive and defensive players + var indices = parent.indices.AsSpan(); + int offensiveIndex = indices.GetItemRef(index); + int defensiveIndex = indices.GetItemRef(index + parent.M); + + // Get the actual source views + var offensive = parent.GetPosition(offensiveIndex); + var defensive = parent.GetPosition(defensiveIndex); + + // Evaluate both positions and test whether the offensive or the defensive + // player wins this competition + var evaluatedOffensive = parent.evaluations[offensiveIndex]; + var evaluatedDefensive = parent.evaluations[defensiveIndex]; + bool offensiveWins = function.CurrentIsBetter( + evaluatedOffensive, + evaluatedDefensive); + + // Get lower and upper bounds + var lowerBounds = parent.lowerBounds.AsSpan().CastUnsafe(); + var upperBounds = parent.upperBounds.AsSpan().CastUnsafe(); + + // Get the current players + var currentOffensive = offensive.CastUnsafe(); + var currentDefensive = defensive.CastUnsafe(); + + // Create new processor for this iteration + var processor = TProcessor.New(); + if (offensiveWins) + { + // Get two random numbers + var r1 = state.Next(); + var r2 = state.Next(); + + // Get OG vector + var og = parent.og.AsSpan().CastUnsafe(); + + // Get a random offensive player + int randomOffensiveIndex = parent.GetRandomOffensiveIndex(index); + var randomOffensive = parent + .GetPosition(randomOffensiveIndex) + .CastUnsafe(); + + // Fetch next vector references + var nextSOG = state.GetNextSOG(); + var nextDefensive = parent + .GetNextPosition(defensiveIndex) + .CastUnsafe(); + for (int i = 0; i < NumDimensionSlices; ++i) + { + // Compute new position and set new vector of defensive player + var xDefNew1 = processor.DetermineNewPosition( + currentDefensive.GetItemRef(i), + og.GetItemRef(i), + randomOffensive.GetItemRef(i), + r1, + r2, + parent.DefensiveStepSize); + + // Clamp new defensive position and store result + var clamped = processor.Clamp( + lowerBounds.GetItemRef(i), + upperBounds.GetItemRef(i), + xDefNew1); + nextDefensive.GetItemRef(i) = clamped; + + // Accumulate SOG result + processor.Accumulate( + ref nextSOG.GetItemRef(i), + currentOffensive.GetItemRef(i)); + } + + // Add new SOG member to state + state.AddSOGMember(); + + // Add offensive player to next sog + int sogIndex = Interlocked.Add(ref parent.sogListCounter, 1); + parent.sogList[sogIndex] = offensiveIndex; + } + else + { + // Get four random numbers + var r1 = state.Next(); + var r2 = state.Next(); + var r3 = state.Next(); + var r4 = state.Next(); + + // Get DG vector + var dg = parent.dg.AsSpan().CastUnsafe(); + + // Get random defensive player + int randomDefensiveIndex = parent.GetRandomDefensiveIndex(index); + var randomDefensive = parent + .GetPosition(randomDefensiveIndex) + .CastUnsafe(); + + // Get SOG and best position data + var sog = parent.sog.AsSpan().CastUnsafe(); + var bestPosition = BestPosition.Span.CastUnsafe(); + + // Fetch next vector references + var nextSDG = state.GetNextSDG(); + var nextOffensive = parent + .GetNextPosition(offensiveIndex) + .CastUnsafe(); + for (int i = 0; i < NumDimensionSlices; ++i) + { + // Compute new position and set new vector of offensive player + var xOffNew1 = processor.DetermineNewPosition( + currentOffensive.GetItemRef(i), + dg.GetItemRef(i), + randomDefensive.GetItemRef(i), + r1, + r2, + parent.OffensiveStepSize); + + // Check whether we can apply SOG adjustments + var xOffNew2 = xOffNew1; + if (HasCurrentSOGAndSDG) + { + xOffNew2 = processor.DetermineNewPosition( + xOffNew1, + sog.GetItemRef(i), + bestPosition.GetItemRef(i), + r3, + r4, + parent.OffensiveSOGStepSize); + } + + // Clamp new offensive position and store result + var clamped = processor.Clamp( + lowerBounds.GetItemRef(i), + upperBounds.GetItemRef(i), + xOffNew2); + nextOffensive.GetItemRef(i) = clamped; + + // Accumulate SDG result + processor.Accumulate( + ref nextSDG.GetItemRef(i), + currentDefensive.GetItemRef(i)); + } + + // Add new SDG member to state + state.AddSDGMember(); + } + } + + /// + /// Accumulates next SOG and SDG values based on all previous intermediate + /// update states. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + public void Finalize( + ReadOnlySpan> intermediateStates) + { + var sog = parent.nextSOG.AsSpan().CastUnsafe(); + var sdg = parent.nextSDG.AsSpan().CastUnsafe(); + + // Store total counters + int sogMembers = 0; + int sdgMembers = 0; + + // Iterate over all dimensions and states accumulate results + foreach (var state in intermediateStates) + { + var sourceSOG = state.GetNextSOG(); + var sourceSDG = state.GetNextSDG(); + + parent.Accumulate( + sog, + sdg, + sourceSOG, + sourceSDG); + + state.AccumulateCounters(ref sogMembers, ref sdgMembers); + } + + // Ensure that we have not lost a single particle + Debug.Assert(sogMembers + sdgMembers == parent.M); + + // Compute averages over all dimension slices + parent.ComputeAverage( + sog, + sdg, + T.CreateSaturating(sogMembers), + T.CreateSaturating(sdgMembers)); + } + } + } +} + +#endif diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Vectorized.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Vectorized.cs new file mode 100644 index 0000000000..ce45e153a0 --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Vectorized.cs @@ -0,0 +1,117 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: MetaOptimizer.Vectorized.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using System.Numerics; +using System.Runtime.CompilerServices; + +#if NET7_0_OR_GREATER + +namespace ILGPU.Algorithms.Optimization.CPU +{ + partial class MetaOptimizer + { + /// + /// A vectorized processor using SIMD operations. + /// + private readonly struct VectorizedProcessor : + IProcessor> + { + /// + /// Creates a new vectorized processor. + /// + public static VectorizedProcessor New() => default; + + /// + /// Returns the vector length. + /// + public static int Length => Vector.Count; + + /// + /// Clamps the given vector. + /// + /// The lower bounds part. + /// The upper bounds part. + /// The vector to clamp. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Vector Clamp( + Vector lower, + Vector upper, + Vector value) => + Vector.Min(Vector.Max(value, lower), upper); + + /// + /// Resets the given data view. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Reset(out Vector data) => + data = new Vector(T.Zero); + + /// + /// Adds the given source to the target view. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Accumulate(ref Vector target, Vector source) + { + var accumulated = source + target; + target = accumulated; + } + + /// + /// Computes the average by taking the given count into account. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void ComputeAverage(ref Vector target, T count) + { + var countValue = new Vector(count); + var average = target / countValue; + target = average; + } + + /// + /// Determines a newly sampled position using vectors. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Vector GetRandomPosition( + Vector lower, + Vector upper, + Vector randomNumber) + { + // Interpolate between lower and upper bound + var lowerFactor = new Vector(T.One) - randomNumber; + var lowerInfluence = lowerFactor * lower; + var upperInfluence = randomNumber * upper; + return lowerInfluence + upperInfluence; + } + + /// + /// Determines a newly sampled position using vectors. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Vector DetermineNewPosition( + Vector position, + Vector firstC, + Vector secondC, + T r1, + T r2, + T stepSize) + { + // Determine new offset to use + var newOffset = r1 * firstC - r2 * secondC; + + // Compute final position + var finalPos = position + newOffset * stepSize; + return finalPos; + } + } + } +} + +#endif diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.cs new file mode 100644 index 0000000000..fee36b5cba --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.cs @@ -0,0 +1,874 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: MetaOptimizer.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.Util; +using System; +using System.Diagnostics.CodeAnalysis; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Threading; + +#if NET7_0_OR_GREATER + +namespace ILGPU.Algorithms.Optimization.CPU +{ + /// + /// This meta optimizer is designed for CPUs and used special .Net features for + /// improved performance. It implements an optimization-performance and runtime- + /// performance optimized version of the SGO algorithm: + /// Squid Game Optimizer (SGO): a novel metaheuristic algorithm + /// doi: 10.1038/s41598-023-32465-z. + /// + /// The main element type for all position vectors. + /// The evaluation data type. + /// + /// This version *does not* implement the vanilla SGO algorithm from the paper. + /// Instead, it uses modified update functions and specially tweaked position update + /// logic using multiple buffers and tuned SGO-winner lists. These modifications of + /// the original algorithm make this implementation significantly better in terms of + /// optimization quality and runtime performance. Moreover, this version is fully + /// parallelized and has the ability to use SIMD vector instructions to improve + /// runtime performance. + /// + public abstract partial class MetaOptimizer : DisposeBase + where T : unmanaged, INumber + where TEvalType : struct, IEquatable + { + #region Nested Types + + /// + /// A scalar or vectorized processor implementing the actual SGO equations. + /// + /// The implementing processor type. + /// The operating element type. + private interface IProcessor + where TSelf : struct, IProcessor + where TType : unmanaged + { + /// + /// Creates a new processor instance. + /// + static abstract TSelf New(); + + /// + /// Returns the number of elements processed in single step. + /// + static abstract int Length { get; } + + /// + /// Resets the given data view. + /// + void Reset(out TType data); + + /// + /// Adds the given source to the target view. + /// + /// The target span to accumulate into. + /// The source span. + void Accumulate(ref TType target, TType source); + + /// + /// Clamps the given value. + /// + /// The lower bounds part. + /// The upper bounds part. + /// The value to clamp. + TType Clamp(TType lower, TType upper, TType value); + + /// + /// Computes the average by taking the given count into account. + /// + /// The target span to read from and write to. + /// The number of points to consider. + void ComputeAverage(ref TType target, T count); + + /// + /// Determines a newly sampled random position within the bounds of lower + /// and upper values. + /// + /// The lower bounds of the position vector. + /// The upper bounds of the position vector. + /// The random number to use. + /// The newly sampled position. + TType GetRandomPosition( + TType lower, + TType upper, + TType randomNumber); + + /// + /// Determines a newly sampled position. + /// + /// The source position. + /// The first centroid position. + /// The second centroid position. + /// + /// The factor describing the influence of . + /// + /// + /// The factor describing the influence of . + /// + /// + /// The step size to use for offset computations. + /// + /// The newly determined position. + TType DetermineNewPosition( + TType position, + TType firstC, + TType secondC, + T r1, + T r2, + T stepSize); + } + + /// + /// A specialized function wrapper implementing the required CPUOptimization + /// interfaces to call delegate functions instead of having inline function + /// specifications. + /// + /// The evaluation function to be used. + /// + /// The function determining whether the first or the second evaluation value + /// given is considered better for the optimization problem. + /// + /// + /// The break function to determine whether to break the solver iteration or not. + /// + private readonly record struct FunctionWrapper( + CPUOptimizationFunction EvalFunction, + CPUOptimizationBreakFunction BreakFunction, + CPUEvaluationComparison EvaluationComparison) : + ICPUOptimizationFunction, + ICPUOptimizationBreakFunction + { + /// + /// Immediately calls the given evaluation function. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TEvalType Evaluate(ReadOnlySpan position) => + EvalFunction(position); + + /// + /// Immediately calls the given break function. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool Break(TEvalType evalType, int iteration) => + BreakFunction(evalType, iteration); + + /// + /// Immediately calls the given result comparison function. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool CurrentIsBetter(TEvalType current, TEvalType proposed) => + EvaluationComparison(current, proposed); + } + + /// + /// A specialized function wrapper implementing the required CPUOptimization + /// interfaces to test whether to break an optimization loop or not. + /// + /// + /// The break function to determine whether to break the solver iteration or not. + /// + private readonly record struct BreakFunctionWrapper( + CPUOptimizationBreakFunction BreakFunction) : + ICPUOptimizationBreakFunction + { + /// + /// Immediately calls the given break function. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool Break(TEvalType evalType, int iteration) => + BreakFunction(evalType, iteration); + } + + /// + /// Wraps a non-intermediate-state-based optimization function. + /// + /// The stateless function to wrap. + private struct CachedOptimizationFunction : + ICPUOptimizationFunction + where TFunction : ICPUOptimizationFunction + { + private TFunction function; + + public CachedOptimizationFunction(TFunction optimizationFunction) + { + function = optimizationFunction; + } + + /// + /// Returns a shared intermediate state object. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public object CreateIntermediate() => + RawComparisonWrapper.SharedIntermediateState; + + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void InitializeIntermediate(object intermediateState) { } + + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FinishProcessing(object intermediateState) { } + + /// + /// Invokes the underlying comparison function to compare current and proposed + /// evaluation instances. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool CurrentIsBetter(TEvalType current, TEvalType proposed) => + function.CurrentIsBetter(current, proposed); + + /// + /// Evaluates the given position while discarding the given intermediate + /// state. + /// + /// The evaluation result. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TEvalType Evaluate( + ReadOnlySpan position, + object intermediateState) => + function.Evaluate(position); + } + + #endregion + + #region Instance + + private readonly System.Random random; + private readonly int[] indices; + + private readonly int[] randomOffensiveIndices; + private readonly int[] randomDefensiveIndices; + + private readonly T[] lowerBounds; + private readonly T[] upperBounds; + + private readonly T[] og; + private readonly T[] dg; + + private T[] sog; + private T[] sdg; + + private T[] nextSOG; + private T[] nextSDG; + + private readonly int[] sogList; + private int sogListCounter; + + private T[] positions; + private T[] nextPositions; + + private readonly TEvalType[] evaluations; + + /// + /// Creates a new meta optimizer instance. + /// + /// The input random instance. + /// The number of players. + /// The dimensionality of the problem. + /// + /// The maximum number of processing threads (if any). + /// + /// + /// The number of dimension values per batched step. + /// + [SuppressMessage( + "Design", + "CA1031:Do not catch general exception types", + Justification = "Catch is used to initialize step sizes to logical 0.5 " + + "which may lead to exceptions depending on the value type")] + protected MetaOptimizer( + System.Random inputRandom, + int numPlayers, + int numDimensions, + int? maxNumParallelThreads, + int numDimensionsPerStep) + { + if (numPlayers < 1) + throw new ArgumentOutOfRangeException(nameof(numPlayers)); + if (numDimensionsPerStep < 1) + throw new ArgumentOutOfRangeException(nameof(numDimensionsPerStep)); + + numPlayers = Math.Max(numPlayers, 4); + numPlayers += numPlayers % 2; + + NumPlayers = numPlayers; + MaxNumWorkers = maxNumParallelThreads.HasValue + ? maxNumParallelThreads.Value < 1 + ? Environment.ProcessorCount + : maxNumParallelThreads.Value + : -1; + + // Update the number of dimensions to ensure valid padding to multiples of + // the vector size + NumDimensions = numDimensions; + NumPaddedDimensions = numDimensions + numDimensions % numDimensionsPerStep; + NumDimensionSlices = NumPaddedDimensions / numDimensionsPerStep; + + random = new System.Random(inputRandom.Next()); + + lowerBounds = new T[NumPaddedDimensions]; + upperBounds = new T[NumPaddedDimensions]; + + og = new T[NumPaddedDimensions]; + dg = new T[NumPaddedDimensions]; + + sog = new T[NumPaddedDimensions]; + sdg = new T[NumPaddedDimensions]; + + nextSOG = new T[NumPaddedDimensions]; + nextSDG = new T[NumPaddedDimensions]; + + M = numPlayers / 2; + randomOffensiveIndices = new int[M]; + randomDefensiveIndices = new int[M]; + + indices = new int[numPlayers]; + sogList = new int[numPlayers]; + positions = new T[numPlayers * NumPaddedDimensions]; + nextPositions = new T[numPlayers * NumPaddedDimensions]; + evaluations = new TEvalType[numPlayers]; + + for (int i = 0; i < numPlayers; ++i) + { + indices[i] = i; + if (i < M) + { + randomOffensiveIndices[i] = i; + randomDefensiveIndices[i] = i + M; + } + } + + // Try to initialize the basic step sizes + try + { + var value2 = T.CreateSaturating(2); + DefensiveStepSize = T.One / value2; + OffensiveStepSize = T.One / value2; + OffensiveSOGStepSize = T.One / value2; + } + catch (Exception) + { + // We actually ignore the initialization of step sizes in this case + } + } + + #endregion + + #region Properties + + /// + /// Returns the number of dimensions. + /// + public int NumDimensions { get; } + + /// + /// Returns the number of padded dimensions. + /// + public int NumPaddedDimensions { get; } + + /// + /// Returns the number of players. + /// + public int NumPlayers { get; } + + /// + /// Returns the number of dimensions per processing step. + /// + private int NumDimensionSlices { get; } + + /// + /// Returns the maximum number of parallel processing threads. + /// + private int MaxNumWorkers { get; } + + /// + /// Returns half the number of players (referred to as M in the scope of the SGO + /// algorithm paper). + /// + protected int M { get; } + + /// + /// Gets or sets lower bounds of this optimizer. + /// + public ReadOnlySpan LowerBounds + { + get => lowerBounds.AsSpan()[..NumDimensions]; + set + { + if (value.Length != NumDimensions) + throw new ArgumentOutOfRangeException(nameof(value)); + value.CopyTo(lowerBounds); + } + } + + /// + /// Gets or sets upper bounds of this optimizer. + /// + public ReadOnlySpan UpperBounds + { + get => upperBounds.AsSpan()[..NumDimensions]; + set + { + if (value.Length != NumDimensions) + throw new ArgumentOutOfRangeException(nameof(value)); + value.CopyTo(upperBounds); + } + } + + /// + /// Gets or sets the step size of the defensive players. + /// + public T DefensiveStepSize { get; set; } + + /// + /// Gets or sets the step size of the offensive players. + /// + public T OffensiveStepSize { get; set; } + + /// + /// Gets or sets the step size of the offensive players in the SOG. + /// + public T OffensiveSOGStepSize { get; set; } + + #endregion + + #region Methods + + /// + /// Gets the current player position memory to operate on source values in the + /// current iteration. + /// + /// The player index. + /// + /// A memory instance holding all multidimensional position information for the + /// given player. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private Memory GetPositionMemory(int playerIndex) => + positions.AsMemory( + playerIndex * NumPaddedDimensions, + NumPaddedDimensions); + + /// + /// Gets the current player position span to operate on source values in the + /// current iteration. + /// + /// The player index. + /// + /// A span holding all multidimensional position information for the given player. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe Span GetPosition(int playerIndex) + { + ref var baseRef = ref positions.AsSpan().GetItemRef( + playerIndex * NumPaddedDimensions); + return new Span(Unsafe.AsPointer(ref baseRef), NumPaddedDimensions); + } + + /// + /// Gets the next position span for value updates in the next iteration. + /// + /// The player index. + /// + /// A span holding all multidimensional position information for the given player. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe Span GetNextPosition(int playerIndex) + { + ref var baseRef = ref nextPositions.AsSpan().GetItemRef( + playerIndex * NumPaddedDimensions); + return new Span(Unsafe.AsPointer(ref baseRef), NumPaddedDimensions); + } + + /// + /// Gets the random offensive index corresponding to the given relative player + /// index. + /// + /// The relative input player index. + /// An absolute random offensive index. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int GetRandomOffensiveIndex(int playerIndex) => + randomOffensiveIndices.AsSpan().GetItemRef(playerIndex); + + /// + /// Gets the random defensive index corresponding to the given relative player + /// index. + /// + /// The relative input player index. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int GetRandomDefensiveIndex(int playerIndex) => + randomDefensiveIndices.AsSpan().GetItemRef(playerIndex); + + /// + /// Resets the contents of the two given spans. + /// + /// The processor type. + /// The processing type. + /// The first span to reset. + /// The second span to reset. + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + private void Reset(Span first, Span second) + where TProcessor : struct, IProcessor + where TType : unmanaged + { + // Reset first and second vectors + var processor = TProcessor.New(); + for (int i = 0; i < NumDimensionSlices; ++i) + { + processor.Reset(out first.GetItemRef(i)); + processor.Reset(out second.GetItemRef(i)); + } + } + + /// + /// Accumulates information from the first source into the first target span and + /// from the second source into the second target span. + /// + /// The processor type. + /// The processing type. + /// The first target span to accumulate into. + /// The second target span to accumulate into. + /// + /// The first source span to get the intermediate results from. + /// + /// + /// The second source span to get the intermediate results from. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + private void Accumulate( + Span firstTarget, + Span secondTarget, + ReadOnlySpan firstSource, + ReadOnlySpan secondSource) + where TProcessor : struct, IProcessor + where TType : unmanaged + { + // Create new processor + var processor = TProcessor.New(); + + // Accumulate first and second vectors + for (int i = 0; i < NumDimensionSlices; ++i) + { + processor.Accumulate( + ref firstTarget.GetItemRef(i), + firstSource.GetItemRef(i)); + processor.Accumulate( + ref secondTarget.GetItemRef(i), + secondSource.GetItemRef(i)); + } + } + + /// + /// Computes the average position vectors based on the given first and second + /// spans holding all multidimensional information. + /// + /// The processor type. + /// The processing type. + /// The first span to compute the average for. + /// The second span to compute the average for. + /// + /// The number of contributors representing the denominator of the first span. + /// + /// + /// The (optional) number of contributors representing the denominator of the + /// second span. If the number is not provided, the number will be equal to the + /// first number of contributors. + /// + [MethodImpl( + MethodImplOptions.AggressiveInlining | + MethodImplOptions.AggressiveOptimization)] + private void ComputeAverage( + Span first, + Span second, + T numContributors, + T? numContributorsSecond = null) + where TProcessor : struct, IProcessor + where TType : unmanaged + { + // Create new processor + var processor = TProcessor.New(); + + // Determine second contributors + numContributors = T.Max(numContributors, T.One); + T secondContributors = T.Max( + numContributorsSecond ?? numContributors, + T.One); + + // Iterate over all dimension slices + for (int i = 0; i < NumDimensionSlices; ++i) + { + processor.ComputeAverage(ref first.GetItemRef(i), numContributors); + processor.ComputeAverage(ref second.GetItemRef(i), secondContributors); + } + } + + /// + /// Optimize the given objective function using delegates. + /// + /// The evaluation function. + /// The break function. + /// + /// The comparison functionality comparing evaluation results. + /// + /// The best known input result. + /// The best known position span. + /// + /// A tuple consisting of the best found result and position vector. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public (TEvalType Result, Memory Position) Optimize( + CPUOptimizationFunction evalFunction, + CPUOptimizationBreakFunction breakFunction, + Comparison comparison, + TEvalType bestResult, + ReadOnlyMemory? bestKnownPosition = default) + { + var wrapper = new FunctionWrapper( + evalFunction, + breakFunction, + (first, second) => + comparison(first, second) >= 0); + return Optimize(wrapper, wrapper, bestResult, bestKnownPosition); + } + + /// + /// Optimize the given objective function using delegates. + /// + /// The evaluation function. + /// + /// The comparison function comparing evaluation results. + /// + /// The break function. + /// The best known input result. + /// The best known position span. + /// + /// A tuple consisting of the best found result and position vector. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public (TEvalType Result, Memory Position) Optimize( + CPUOptimizationFunction evalFunction, + CPUOptimizationBreakFunction breakFunction, + CPUEvaluationComparison evaluationComparison, + TEvalType bestResult, + ReadOnlyMemory? bestKnownPosition = default) + { + var wrapper = new FunctionWrapper( + evalFunction, + breakFunction, + evaluationComparison); + return Optimize(wrapper, wrapper, bestResult, bestKnownPosition); + } + + /// + /// Optimize the given objective function using specialized optimization function + /// types. + /// + /// The optimization function. + /// The break function. + /// The best known input result. + /// The best known position span. + /// + /// A tuple consisting of the best found result and position vector. + /// + public (TEvalType Result, Memory Position) Optimize< + TFunction, + TBreakFunction>( + in TFunction optimizationFunction, + in TBreakFunction breakFunction, + TEvalType bestResult, + ReadOnlyMemory? bestKnownPosition = default) + where TFunction : ICPUOptimizationFunction + where TBreakFunction : ICPUOptimizationBreakFunction => + Optimize( + optimizationFunction, + breakFunction, + CPUPositionModifier.GetNop(), + bestResult, + bestKnownPosition); + + /// + /// Optimize the given objective function using specialized optimization function + /// types. + /// + /// The optimization function. + /// The break function. + /// + /// The position modifier to apply to all position updates during optimization. + /// + /// The best known input result. + /// The best known position span. + /// + /// A tuple consisting of the best found result and position vector. + /// + public (TEvalType Result, Memory Position) Optimize< + TFunction, + TBreakFunction, + TModifier>( + in TFunction optimizationFunction, + in TBreakFunction breakFunction, + in TModifier positionModifier, + TEvalType bestResult, + ReadOnlyMemory? bestKnownPosition = default) + where TFunction : ICPUOptimizationFunction + where TBreakFunction : ICPUOptimizationBreakFunction + where TModifier : ICPUPositionModifier + { + var cachedFunctionWrapper = new CachedOptimizationFunction( + optimizationFunction); + return Optimize< + CachedOptimizationFunction, + object, + TBreakFunction, + TModifier>( + cachedFunctionWrapper, + breakFunction, + positionModifier, + bestResult, + bestKnownPosition); + } + + /// + /// Optimize the given objective function using specialized optimization function + /// types. + /// + /// The optimization function type. + /// + /// The intermediate optimization state type. + /// + /// The break function type. + /// The position modifier type. + /// The optimization function. + /// The break function. + /// + /// The position modifier to apply to all position updates during optimization. + /// + /// The best known input result. + /// The best known position span. + /// + /// A tuple consisting of the best found result and position vector. + /// + public abstract (TEvalType Result, Memory Position) Optimize< + TFunction, + TIntermediate, + TBreakFunction, + TModifier>( + in TFunction optimizationFunction, + in TBreakFunction breakFunction, + in TModifier positionModifier, + TEvalType bestResult, + ReadOnlyMemory? bestKnownPosition = default) + where TFunction : ICPUOptimizationFunction + where TIntermediate : class + where TBreakFunction : ICPUOptimizationBreakFunction + where TModifier : ICPUPositionModifier; + + /// + /// Optimize the given objective function using specialized optimization function + /// types. This overload uses raw optimization function callbacks to implement + /// extremely customizable optimization functions on top of the current stack. + /// + /// The optimization function. + /// The break function. + /// + /// The comparison function comparing evaluation results. + /// + /// The best known input result. + /// The best known position span. + /// + /// A tuple consisting of the best found result and position vector. + /// + public abstract (TEvalType Result, Memory Position) OptimizeRaw( + RawCPUOptimizationFunction optimizationFunction, + CPUOptimizationBreakFunction breakFunction, + CPUEvaluationComparison evaluationComparison, + TEvalType bestResult, + ReadOnlyMemory? bestKnownPosition = default); + + /// + /// Copies all current positions to all next positions. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void CopyPositions() + { + var positionsSpans = positions.AsSpan(); + var nextPositionsSpan = nextPositions.AsSpan(); + positionsSpans.CopyTo(nextPositionsSpan); + } + + /// + /// Permutes internal index arrays. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Permute() + { + for (int i = NumPlayers - 1; i > 1; --i) + { + int j = random.Next(i + 1); + Utilities.Swap(ref indices[i], ref indices[j]); + } + + for (int i = M - 1; i > 1; --i) + { + int j = random.Next(i + 1); + Utilities.Swap( + ref randomOffensiveIndices[i], + ref randomOffensiveIndices[j]); + + int k = random.Next(i + 1); + Utilities.Swap( + ref randomDefensiveIndices[i], + ref randomDefensiveIndices[k]); + } + } + + /// + /// Initializes the internal SOG list for the current iteration. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void InitSOGList() + { + // Reset SOG list + Interlocked.Exchange(ref sogListCounter, 0); +#if DEBUG + Array.Clear(sogList); +#endif + } + + /// + /// Swaps all intermediate buffers for the next iteration. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void SwapBuffers() + { + // Swap current and next positions + Utilities.Swap(ref positions, ref nextPositions); + + // Swap current SOG and SDG vectors + Utilities.Swap(ref sog, ref nextSOG); + Utilities.Swap(ref sdg, ref nextSDG); + } + + #endregion + } +} + +#endif diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/OptimizationFunction.cs b/Src/ILGPU.Algorithms/Optimization/CPU/OptimizationFunction.cs new file mode 100644 index 0000000000..9801b56223 --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/OptimizationFunction.cs @@ -0,0 +1,142 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: OptimizationFunction.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU.Util; +using System; +using System.Numerics; +using System.Threading.Tasks; + +namespace ILGPU.Algorithms.Optimization.CPU +{ + /// + /// Represents a generic optimization function to be used with CPU-specific parts + /// of the optimization library. + /// + /// The main element type for all position vectors. + /// The evaluation data type. + public delegate TEvalType CPUOptimizationFunction( + ReadOnlySpan position) + where T : struct + where TEvalType : struct, IEquatable; + + /// + /// A raw optimization function operating on all positions and evaluation values + /// directly to implement specialized and highly domain-specific evaluators. + /// + /// The main element type for all position vectors. + /// The evaluation data type. + /// + /// A memory instance pointing to all packed position vectors of all particles. + /// + /// + /// A memory instance pointing to all evaluation values of all particles. + /// + /// The number of dimensions. + /// + /// The number of padded dimensions taking vectorization into account. + /// + /// The number of particles. + /// + /// The position stride to be used to compute individual vector elements. In this + /// scope, the X dimension refers to the number of players and the Y dimension + /// is equal to the number of padded dimensions. + /// + /// + /// Parallel processing options to be used if further parallel processing is desired. + /// + public delegate void RawCPUOptimizationFunction( + Memory allPositions, + Memory evaluations, + int numDimensions, + int numPaddedDimensions, + int numParticles, + Stride2D.DenseY positionStride, + ParallelOptions options); + + /// + /// A custom break function to break the optimization loop at some point. Returns + /// true if the optimization loop should be stopped. + /// + /// The evaluation data type. + public delegate bool CPUOptimizationBreakFunction( + TEvalType evalType, + int iteration); + + /// + /// Represents a comparison function operating on evaluation types. If the first + /// value is considered to be better than the second one, true will be returned by + /// this function. + /// + /// The evaluation data type. + public delegate bool CPUEvaluationComparison( + TEvalType first, + TEvalType second); + + /// + /// An abstract optimization function to be used with CPU-specific optimizers. + /// + /// The main element type for all position vectors. + /// The evaluation data type. + public interface ICPUOptimizationFunction : + IBaseOptimizationFunction + where T : struct + where TEvalType : struct, IEquatable + { + /// + /// Evaluates the given position vector. + /// + /// The position span. + /// The resulting evaluation value. + TEvalType Evaluate(ReadOnlySpan position); + } + + /// + /// An abstract optimization function to be used with CPU-specific optimizers. + /// + /// The main element type for all position vectors. + /// The evaluation data type. + /// + /// The type of all intermediate states during processing. + /// + public interface ICPUOptimizationFunction : + IBaseOptimizationFunction, + IParallelCache + where T : struct + where TIntermediate : class + where TEvalType : struct, IEquatable + { + /// + /// Evaluates the given position vector. + /// + /// The position span. + /// The intermediate processing state. + /// The resulting evaluation value. + TEvalType Evaluate(ReadOnlySpan position, TIntermediate intermediateState); + } + + /// + /// An abstract optimizer break logic to realize custom iteration logic. + /// + /// The evaluation data type. + public interface ICPUOptimizationBreakFunction + where TEvalType : struct + { + /// + /// Tests the given evaluation type and the current iteration to enable the + /// implementation of custom optimizer break functionality and returns true if + /// the current optimizer process should be terminated. + /// + /// The best found evaluation result so far. + /// The current solver iteration. + /// True if the current solver iteration should be terminated. + bool Break(TEvalType evalType, int iteration); + } +} diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/PositionModifier.cs b/Src/ILGPU.Algorithms/Optimization/CPU/PositionModifier.cs new file mode 100644 index 0000000000..1cc3f41f44 --- /dev/null +++ b/Src/ILGPU.Algorithms/Optimization/CPU/PositionModifier.cs @@ -0,0 +1,162 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Algorithms +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: PositionModifier.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using System; +using System.Runtime.CompilerServices; + +namespace ILGPU.Algorithms.Optimization.CPU +{ + /// + /// Represents an abstract modifier for player/particle positions during optimization. + /// This allows users to implement specific clamping, rounding, or adjustments + /// during an optimization run. + /// + /// The element type. + public interface ICPUPositionModifier + where T : unmanaged + { + /// + /// Adjusts the given player/particle position according to user- and domain- + /// specific constraints. + /// + /// The current player/particle index. + /// The position to adjust (if desired). + /// The raw dimensions of the input problem. + /// The padded number of dimensions. + /// + /// The length of the position memory will be equal to the input problem + /// dimension in case of a scalar optimizer. If the optimizer has been created + /// for vector-based execution, the position memory length will be padded + /// according to the vector length. If you want to use vector instructions inside + /// this function, make sure to create a vectorized optimizer or account for + /// non-optimized memory lengths. + /// + void AdjustPosition( + int index, + Memory position, + int numDimensions, + int numPaddedDimensions); + } + + /// + /// Static utility class for interfaces. + /// + public static class CPUPositionModifier + { + /// + /// Represents a nop position modifier. + /// + /// The element type. + public readonly struct Nop : ICPUPositionModifier + where T : unmanaged + { + /// + /// Does not perform any operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AdjustPosition( + int index, + Memory position, + int numDimensions, + int numPaddedDimensions) + { } + } + + /// + /// Rounds floating point values according to the given number of digits. + /// + /// The number of digits to round to. + /// The midpoint rounding mode. + public readonly record struct FloatRoundingModifier( + int NumDigits, + MidpointRounding MidpointRounding) : + ICPUPositionModifier + { + /// + /// Rounds the given position according to the specified number of digits. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AdjustPosition( + int index, + Memory position, + int numDimensions, + int numPaddedDimensions) + { + var span = position.Span; + for (int i = 0; i < numDimensions; ++i) + span[i] = XMath.Round(span[i], NumDigits, MidpointRounding); + } + } + + /// + /// Rounds floating point values according to the given number of digits. + /// + /// The number of digits to round to. + /// The midpoint rounding mode. + public readonly record struct DoubleRoundingModifier( + int NumDigits, + MidpointRounding MidpointRounding) : + ICPUPositionModifier + { + /// + /// Rounds the given position according to the specified number of digits. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AdjustPosition( + int index, + Memory position, + int numDimensions, + int numPaddedDimensions) + { + var span = position.Span; + for (int i = 0; i < numDimensions; ++i) + span[i] = XMath.Round(span[i], NumDigits, MidpointRounding); + } + } + + /// + /// Returns a new no-operation CPU position modifier. + /// + /// The element type. + /// A new Nop position modifier. + public static Nop GetNop() where T : unmanaged => new(); + + /// + /// Returns a new float rounding modifier. + /// + /// The number of digits to round to. + /// The midpoint rounding mode. + /// A new rounding modifier. + public static FloatRoundingModifier GetFloatRounding( + int numDigits, + MidpointRounding midpointRounding = MidpointRounding.ToEven) + { + if (numDigits < 0) + throw new ArgumentOutOfRangeException(nameof(numDigits)); + return new(numDigits, midpointRounding); + } + + /// + /// Returns a new double rounding modifier. + /// + /// The number of digits to round to. + /// The midpoint rounding mode. + /// A new rounding modifier. + public static DoubleRoundingModifier GetDoubleRounding( + int numDigits, + MidpointRounding midpointRounding = MidpointRounding.ToEven) + { + if (numDigits < 0) + throw new ArgumentOutOfRangeException(nameof(numDigits)); + return new(numDigits, midpointRounding); + } + } +} \ No newline at end of file diff --git a/Src/ILGPU.Algorithms/Optimization/IOptimizationFunction.cs b/Src/ILGPU.Algorithms/Optimization/IOptimizationFunction.cs index d5e2ad0b12..bbaf7245be 100644 --- a/Src/ILGPU.Algorithms/Optimization/IOptimizationFunction.cs +++ b/Src/ILGPU.Algorithms/Optimization/IOptimizationFunction.cs @@ -13,10 +13,28 @@ using System; using System.Numerics; -#if NET7_0_OR_GREATER - namespace ILGPU.Algorithms.Optimization { + /// + /// An abstract optimization function supporting comparisons between evaluation types. + /// + /// The evaluation data type. + public interface IBaseOptimizationFunction + where TEvalType : struct, IEquatable + { + /// + /// Compares the current evaluation value with the proposed one and returns true + /// if the current one is considered better in any way. + /// + /// The currently known value. + /// The proposed evaluation value. + /// + /// True if the current value is considered better than the proposed value. + /// + bool CurrentIsBetter(TEvalType current, TEvalType proposed); + } + +#if NET7_0_OR_GREATER /// /// A generic optimization function that defines the objective of an optimization /// process using evaluation and comparison methods. @@ -24,7 +42,8 @@ namespace ILGPU.Algorithms.Optimization /// The vectorized numeric type. /// The element type of a numeric type. /// The evaluation data type. - public interface IOptimizationFunction + public interface IOptimizationFunction : + IBaseOptimizationFunction where TNumericType : unmanaged, IVectorType where TElementType : unmanaged, INumber where TEvalType : unmanaged, IEquatable @@ -43,18 +62,7 @@ TEvalType Evaluate( LongIndex1D index, Index1D dimension, SingleVectorView positionView); - - /// - /// Compares the current evaluation value with the proposed one and returns true - /// if the current one is considered better in any way. - /// - /// The currently known value. - /// The proposed evaluation value. - /// - /// True if the current value is considered better than the proposed value. - /// - bool CurrentIsBetter(TEvalType current, TEvalType proposed); } +#endif } -#endif diff --git a/Src/ILGPU/Util/ParallelCache.cs b/Src/ILGPU/Util/ParallelCache.cs new file mode 100644 index 0000000000..ecb4adbb96 --- /dev/null +++ b/Src/ILGPU/Util/ParallelCache.cs @@ -0,0 +1,378 @@ +// --------------------------------------------------------------------------------------- +// ILGPU +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: ParallelCache.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Threading.Tasks; + +namespace ILGPU.Util +{ + /// + /// Represents a parallel object cache to be used in combination with a + /// for implementation to avoid unnecessary temporary object + /// creation. + /// + /// The type of the elements to cache. + public abstract class ParallelCache : DisposeBase, IParallelCache + where T : class + { + #region Instance + + private InlineList cache; + private InlineList used; + + /// + /// Creates a new parallel cache. + /// + /// + /// The initial number of processing threads (if any). + /// + protected ParallelCache(int? initialCapacity = null) + { + int capacity = initialCapacity ?? Environment.ProcessorCount * 2; + cache = InlineList.Create(capacity); + used = InlineList.Create(capacity); + + LocalInitializer = GetOrCreate; + LocalFinalizer = FinishProcessing; + } + + #endregion + + #region Properties + + /// + /// Returns the underlying sync root object. + /// + public object SyncRoot { get; } = new object(); + + /// + /// Returns the local initializer function. + /// + public Func LocalInitializer { get; } + + /// + /// Returns the local finalizer action. + /// + public Action LocalFinalizer { get; } + + /// + /// Returns the underlying used intermediates. + /// + protected ReadOnlySpan Used => used; + + #endregion + + #region Methods + + /// + /// Initializes this parallel cache of the next parallel operation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void InitializeProcessing() + { + // This method does not perform an operation at the moment but this may + // change in the future. For this reason, this (empty) method remains here + // and should be called in all cases prior to calling GetOrCreate(). + } + + /// + /// Gets or creates a new intermediate array tuple storing information for the + /// upcoming optimizer iteration. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public T GetOrCreate() + { + // Checks the cache contents to retrieve previously + T intermediate; + lock (SyncRoot) + { + if (cache.Count > 0) + { + int lastIndex = cache.Count - 1; + intermediate = cache[lastIndex]; + cache.RemoveAt(lastIndex); + } + else + { + // Create a new intermediate result + intermediate = CreateIntermediate(); + } + } + + // Initialize intermediate result and return + InitializeIntermediate(intermediate); + + // Add to our list of used intermediates + lock (SyncRoot) + used.Add(intermediate); + + return intermediate; + } + + /// + /// Finishes a parallel processing step. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FinishProcessing() + { + // Return all used intermediates to the cache + cache.AddRange(used); + used.Clear(); + + } + + /// + /// Creates a new intermediate instance without initializing it properly. + /// + /// The created intermediate state. + protected abstract T CreateIntermediate(); + + /// + /// Initializes the given intermediate state in order to prepare it for + /// processing. + /// + /// The intermediate state to prepare. + protected virtual void InitializeIntermediate(T intermediateState) { } + + /// + /// Finishes processing of the current thread while getting an intermediate state. + /// + /// The intermediate state to operate on. + protected virtual void FinishProcessing(T intermediateState) { } + + #endregion + + #region IParallelCache + + /// + /// Creates a new intermediate instance without initializing it properly. + /// + /// The created intermediate state. + T IParallelCache.CreateIntermediate() => CreateIntermediate(); + + /// + /// Initializes the given intermediate state in order to prepare it for + /// processing. + /// + /// The intermediate state to prepare. + void IParallelCache.InitializeIntermediate(T intermediateState) => + InitializeIntermediate(intermediateState); + + /// + /// Finishes processing of the current thread while getting an intermediate state. + /// + /// The intermediate state to operate on. + void IParallelCache.FinishProcessing(T intermediateState) => + FinishProcessing(intermediateState); + + #endregion + + #region IDisposable + + /// + /// Disposes all created intermediate states (if required). + /// + protected override void Dispose(bool disposing) + { + // Check whether we need to dispose all elements + if (cache.Count > 0 && typeof(IDisposable).IsAssignableFrom(typeof(T))) + { + foreach (var intermediateStates in cache) + intermediateStates.AsNotNullCast().Dispose(); + } + base.Dispose(disposing); + } + + #endregion + } + + /// + /// An abstract parallel cache interface operating on intermediate states. + /// + /// The type of all intermediate states. + public interface IParallelCache + { + /// + /// Creates a new intermediate instance without initializing it properly. + /// + /// The created intermediate state. + T CreateIntermediate(); + + /// + /// Initializes the given intermediate state in order to prepare it for + /// processing. + /// + /// The intermediate state to prepare. + void InitializeIntermediate(T intermediateState); + + /// + /// Finishes processing of the current thread while getting an intermediate state. + /// + /// The intermediate state to operate on. + void FinishProcessing(T intermediateState); + } + + /// + /// An abstract parallel processing body representing a function to be executed + /// concurrently on a given value range. It operates on intermediate values that are + /// managed by its surrounding processing cache. + /// + /// The type of all intermediate states. + public interface IParallelProcessingBody + where T : class + { + /// + /// Initializes this processing body to prepare the upcoming parallel processing + /// steps. + /// + void Initialize(); + + /// + /// Processes a single element concurrently while accepting an intermediate state + /// on which this body operates on. + /// + /// The current processing element index. + /// The parallel loop state (if any). + /// + /// The current intermediate state for this thread. + /// + void Process( + int index, + ParallelLoopState? loopState, + T intermediateState); + + /// + /// Finalizes the current body operating while having the ability to inspect all + /// previously used intermediate states. + /// + /// + /// A span referring to all previously used intermediate states. + /// + void Finalize(ReadOnlySpan intermediateStates); + } + + /// + /// Static helpers for parallel processing extensions. + /// + public static class ParallelProcessing + { + /// + /// Gets or sets whether debug mode is enabled. Note that this assignment needs to + /// be changes before the first + /// instance has been created since the flag is cached locally to enable JIT + /// optimizations. + /// + public static bool DebugMode { get; set; } + } + + /// + /// Represents a parallel object cache to be used in combination with a + /// for implementation to avoid unnecessary temporary object + /// creation. Furthermore, this implementation operates on specialized body instances + /// to avoid virtual function calls in each processing step. + /// + /// The type of the elements to cache. + /// The type of the custom loop body instance. + public abstract class ParallelProcessingCache : ParallelCache + where T : class + where TBody : IParallelProcessingBody + { + /// + /// Returns true if the debug mode is enabled for all parallel processing + /// operations. + /// + private static readonly bool DebugMode = ParallelProcessing.DebugMode; + + private readonly Func body; + private readonly TBody bodyImplementation; + private readonly ParallelOptions defaultOptions = new(); + + /// + /// Creates a new parallel processing cache operating on intermediate states. + /// + /// + /// The initial number of processing threads (if any). + /// + [SuppressMessage( + "Usage", + "CA2214:Do not call overridable methods in constructors", + Justification = "This method is called here as it represents an abstract " + + "static factory method")] + protected ParallelProcessingCache(int? initialCapacity = null) + : base(initialCapacity) + { + bodyImplementation = CreateBody(); + body = (i, state, intermediate) => + { + bodyImplementation.Process(i, state, intermediate); + return intermediate; + }; + } + + /// + /// Creates the required parallel processing body to be used. + /// + /// The processing body to use. + protected abstract TBody CreateBody(); + + /// + /// Performs the current operation in parallel. + /// + /// The inclusive start index. + /// The exclusive end index. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void ParallelFor(int fromInclusive, int toExclusive) => + ParallelFor(fromInclusive, toExclusive, defaultOptions); + + /// + /// Performs the current operation in parallel. + /// + /// The inclusive start index. + /// The exclusive end index. + /// The parallel execution options. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void ParallelFor( + int fromInclusive, + int toExclusive, + ParallelOptions options) + { + // Initialize processing cache + InitializeProcessing(); + + // Initialize operation + bodyImplementation.Initialize(); + + // Check for enabled debug mode + if (DebugMode) + { + var intermediate = GetOrCreate(); + for (int i = fromInclusive; i < toExclusive; ++i) + body(i, null, intermediate); + } + else + { + Parallel.For( + fromInclusive, + toExclusive, + options, + LocalInitializer, + body, + LocalFinalizer); + } + + // Finalize operation + bodyImplementation.Finalize(Used); + FinishProcessing(); + } + } +}