diff --git a/Src/ILGPU.Algorithms.Tests.CPU/CPUMetaOptimizerTests.cs b/Src/ILGPU.Algorithms.Tests.CPU/CPUMetaOptimizerTests.cs
new file mode 100644
index 0000000000..b19765fc4a
--- /dev/null
+++ b/Src/ILGPU.Algorithms.Tests.CPU/CPUMetaOptimizerTests.cs
@@ -0,0 +1,330 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: CPUMetaOptimizerTests.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU.Algorithms.Optimization.CPU;
+using ILGPU.Algorithms.Random;
+using System;
+using System.Threading.Tasks;
+using Xunit;
+
+#if NET7_0_OR_GREATER
+
+#pragma warning disable CA1034 // Do not nest types
+#pragma warning disable CA1819 // Properties should not return arrays
+
+namespace ILGPU.Algorithms.Tests.CPU
+{
+    /// <summary>
+    /// Contains tests to verify the functionality of the CPU-specialized
+    /// <see cref="MetaOptimizer{T,TEvalType}"/> class.
+    /// </summary>
+    public class CPUMetaOptimizerTests
+    {
+        #region CPU Functions
+
+        public interface IOptimizerTestFunction :
+            OptimizationTests.IPredefineTestFunction,
+            ICPUOptimizationFunction<float, float>
+        { }
+
+        public readonly record struct TestBreakFunction(float Goal) :
+            ICPUOptimizationBreakFunction<float>
+        {
+            public bool Break(float evalType, int iteration) =>
+                Math.Abs(evalType - Goal) < 1e-4f || iteration > 500;
+        }
+
+        /// <summary>
+        /// Represents the Himmelblau function:
+        /// https://en.wikipedia.org/wiki/Test_functions_for_optimization
+        /// </summary>
+        public readonly record struct HimmelblauFunction : IOptimizerTestFunction
+        {
+            public float Evaluate(ReadOnlySpan<float> position) =>
+                OptimizationTests.HimmelblauFunction.Evaluate(
+                    position[0],
+                    position[1]);
+
+            public bool CurrentIsBetter(float current, float proposed) =>
+                current < proposed;
+
+            public float Result =>
+                new OptimizationTests.HimmelblauFunction().Result;
+            public float[] LowerBounds =>
+                new OptimizationTests.HimmelblauFunction().LowerBounds;
+            public float[] UpperBounds =>
+                new OptimizationTests.HimmelblauFunction().UpperBounds;
+        }
+
+        /// <summary>
+        /// Represents the Easom function:
+        /// https://en.wikipedia.org/wiki/Test_functions_for_optimization
+        /// </summary>
+        public readonly record struct EasomFunction : IOptimizerTestFunction
+        {
+            public float Evaluate(ReadOnlySpan<float> position) =>
+                OptimizationTests.EasomFunction.Evaluate(
+                    position[0],
+                    position[1]);
+
+            public bool CurrentIsBetter(float current, float proposed) =>
+                current < proposed;
+
+            public float Result =>
+                new OptimizationTests.EasomFunction().Result;
+            public float[] LowerBounds =>
+                new OptimizationTests.EasomFunction().LowerBounds;
+            public float[] UpperBounds =>
+                new OptimizationTests.EasomFunction().UpperBounds;
+        }
+        /// <summary>
+        /// Represents the Shaffer function N4:
+        /// https://en.wikipedia.org/wiki/Test_functions_for_optimization
+        /// </summary>
+        public readonly record struct ShafferFunction4 : IOptimizerTestFunction
+        {
+            public float Evaluate(ReadOnlySpan<float> position) =>
+                OptimizationTests.ShafferFunction4.Evaluate(
+                    position[0],
+                    position[1]);
+
+            public bool CurrentIsBetter(float current, float proposed) =>
+                current < proposed;
+
+            public float Result =>
+                new OptimizationTests.ShafferFunction4().Result;
+            public float[] LowerBounds =>
+                new OptimizationTests.ShafferFunction4().LowerBounds;
+            public float[] UpperBounds =>
+                new OptimizationTests.ShafferFunction4().UpperBounds;
+        }
+
+        /// <summary>
+        /// Represents the Rosenbrock function constrained to a disk
+        /// https://en.wikipedia.org/wiki/Test_functions_for_optimization
+        /// </summary>
+        public readonly record struct RosenbrockDisk : IOptimizerTestFunction
+        {
+            public float Evaluate(ReadOnlySpan<float> position) =>
+                OptimizationTests.RosenbrockDisk.Evaluate(
+                    position[0],
+                    position[1]);
+
+            public bool CurrentIsBetter(float current, float proposed) =>
+                current < proposed;
+
+            public float Result =>
+                new OptimizationTests.RosenbrockDisk().Result;
+            public float[] LowerBounds =>
+                new OptimizationTests.RosenbrockDisk().LowerBounds;
+            public float[] UpperBounds =>
+                new OptimizationTests.RosenbrockDisk().UpperBounds;
+        }
+
+        /// <summary>
+        /// Represents the Gomez and Levy function:
+        /// https://en.wikipedia.org/wiki/Test_functions_for_optimization
+        /// </summary>
+        public readonly record struct GomezAndLevyFunction : IOptimizerTestFunction
+        {
+            public float Evaluate(ReadOnlySpan<float> position) =>
+                OptimizationTests.GomezAndLevyFunction.Evaluate(
+                    position[0],
+                    position[1]);
+
+            public bool CurrentIsBetter(float current, float proposed) =>
+                current < proposed;
+
+            public float Result =>
+                new OptimizationTests.GomezAndLevyFunction().Result;
+            public float[] LowerBounds =>
+                new OptimizationTests.GomezAndLevyFunction().LowerBounds;
+            public float[] UpperBounds =>
+                new OptimizationTests.GomezAndLevyFunction().UpperBounds;
+        }
+
+        #endregion
+
+        #region MemberData
+
+        public static TheoryData<
+            object,
+            object,
+            object,
+            object,
+            object> TestData =>
+            new TheoryData<
+                object,
+                object,
+                object,
+                object,
+                object>
+        {
+            { new HimmelblauFunction(), 8192, 0.5f, 0.5f, 0.5f },
+            { new EasomFunction(), 81920, 0.5f, 0.5f, 0.5f },
+            { new ShafferFunction4(), 8192, 0.5f, 0.5f, 0.5f },
+            { new RosenbrockDisk(), 8192, 0.5f, 0.5f, 0.5f },
+            { new GomezAndLevyFunction(), 81920, 0.5f, 0.5f, 0.5f },
+        };
+
+        #endregion
+
+        [Theory]
+        [MemberData(nameof(TestData))]
+        public void MetaOptimizationScalar<TObjective>(
+            TObjective objective,
+            int numParticles,
+            float stepSizeDefensive,
+            float stepSizeOffensive,
+            float stepSizeOffensiveSOG)
+            where TObjective : struct, IOptimizerTestFunction
+        {
+            int numDimensions = objective.LowerBounds.Length;
+            var random = new System.Random(13377331);
+
+            using var optimizer = MetaOptimizer.CreateScalar<
+                float,
+                float,
+                RandomRanges.RandomRangeFloatProvider<XorShift64Star>>(
+                random,
+                numParticles,
+                numDimensions);
+
+            optimizer.LowerBounds = objective.LowerBounds;
+            optimizer.UpperBounds = objective.UpperBounds;
+
+            optimizer.DefensiveStepSize = stepSizeDefensive;
+            optimizer.OffensiveStepSize = stepSizeOffensive;
+            optimizer.OffensiveSOGStepSize = stepSizeOffensiveSOG;
+
+            var breakFunction = new TestBreakFunction(objective.Result);
+            var result = optimizer.Optimize(
+                objective,
+                breakFunction,
+                float.MaxValue);
+
+            // The actually achievable result is 1e-6. However, as the RNG gives us
+            // non-deterministic results due to parallel processing, we limit ourselves
+            // to 1e-2 to make sure that the result lies roughly in the same ballpark
+            // what we were expecting
+            Assert.True(Math.Abs(result.Result - objective.Result) < 1e-2f);
+        }
+
+        [Theory]
+        [MemberData(nameof(TestData))]
+        public void MetaOptimizationVectorized<TObjective>(
+            TObjective objective,
+            int numParticles,
+            float stepSizeDefensive,
+            float stepSizeOffensive,
+            float stepSizeOffensiveSOG)
+            where TObjective : struct, IOptimizerTestFunction
+        {
+            int numDimensions = objective.LowerBounds.Length;
+            var random = new System.Random(13377331);
+
+            using var optimizer = MetaOptimizer.CreateVectorized<
+                float,
+                float,
+                RandomRanges.RandomRangeFloatProvider<XorShift64Star>>(
+                random,
+                numParticles,
+                numDimensions);
+
+            optimizer.LowerBounds = objective.LowerBounds;
+            optimizer.UpperBounds = objective.UpperBounds;
+
+            optimizer.DefensiveStepSize = stepSizeDefensive;
+            optimizer.OffensiveStepSize = stepSizeOffensive;
+            optimizer.OffensiveSOGStepSize = stepSizeOffensiveSOG;
+
+            var breakFunction = new TestBreakFunction(objective.Result);
+            var result = optimizer.Optimize(
+                objective,
+                breakFunction,
+                float.MaxValue);
+
+            // The actually achievable result is 1e-6. However, as the RNG gives us
+            // non-deterministic results due to parallel processing, we limit ourselves
+            // to 1e-2 to make sure that the result lies roughly in the same ballpark
+            // what we were expecting
+            Assert.True(Math.Abs(result.Result - objective.Result) < 1e-2f);
+        }
+
+        [Theory]
+        [MemberData(nameof(TestData))]
+        public void MetaOptimizationScalarRaw<TObjective>(
+            TObjective objective,
+            int numParticles,
+            float stepSizeDefensive,
+            float stepSizeOffensive,
+            float stepSizeOffensiveSOG)
+            where TObjective : struct, IOptimizerTestFunction
+        {
+            int numDimensions = objective.LowerBounds.Length;
+            var random = new System.Random(13377331);
+
+            using var optimizer = MetaOptimizer.CreateScalar<
+                float,
+                float,
+                RandomRanges.RandomRangeFloatProvider<XorShift64Star>>(
+                random,
+                numParticles,
+                numDimensions);
+
+            optimizer.LowerBounds = objective.LowerBounds;
+            optimizer.UpperBounds = objective.UpperBounds;
+
+            optimizer.DefensiveStepSize = stepSizeDefensive;
+            optimizer.OffensiveStepSize = stepSizeOffensive;
+            optimizer.OffensiveSOGStepSize = stepSizeOffensiveSOG;
+
+            void EvaluatePosition(
+                Memory<float> allPositions,
+                Memory<float> evaluations,
+                int _,
+                int numPaddedDimensions,
+                int __,
+                Stride2D.DenseY positionStride,
+                ParallelOptions options)
+            {
+                for (int i = 0; i < numParticles; ++i)
+                // Parallel.For(0, numParticles, options, i =>
+                {
+                    int offset = positionStride.ComputeElementIndex((i, 0));
+                    int endOffset = positionStride.ComputeElementIndex(
+                        (i, numPaddedDimensions));
+                    var position = allPositions.Slice(offset, endOffset - offset);
+                    var result = objective.Evaluate(position.Span);
+                    evaluations.Span[i] = result;
+                }
+            }
+
+            var breakFunction = new TestBreakFunction(objective.Result);
+            var result = optimizer.OptimizeRaw(
+                EvaluatePosition,
+                breakFunction.Break,
+                objective.CurrentIsBetter,
+                float.MaxValue);
+
+            // The actually achievable result is 1e-6. However, as the RNG gives us
+            // non-deterministic results due to parallel processing, we limit ourselves
+            // to 1e-2 to make sure that the result lies roughly in the same ballpark
+            // what we were expecting
+            Assert.True(Math.Abs(result.Result - objective.Result) < 1e-2f);
+        }
+    }
+}
+
+#pragma warning restore CA1819
+#pragma warning restore CA1034
+
+#endif
diff --git a/Src/ILGPU.Algorithms.Tests/OptimizationTests.cs b/Src/ILGPU.Algorithms.Tests/OptimizationTests.cs
index 7ab8acbbe3..1d70ad9292 100644
--- a/Src/ILGPU.Algorithms.Tests/OptimizationTests.cs
+++ b/Src/ILGPU.Algorithms.Tests/OptimizationTests.cs
@@ -20,6 +20,7 @@
 using ILGPU.Tests;
 using System.Linq;
 using System.Numerics;
+using System.Runtime.CompilerServices;
 using Xunit;
 using Xunit.Abstractions;
 
@@ -34,11 +35,18 @@ public abstract partial class OptimizationTests : TestBase
         protected OptimizationTests(ITestOutputHelper output, TestContext testContext)
             : base(output, testContext)
         { }
-        
+
 #if NET7_0_OR_GREATER
-        
+
         #region Objectives
-        
+
+        public interface IPredefineTestFunction
+        {
+            float Result { get; }
+            float[] LowerBounds { get; }
+            float[] UpperBounds { get; }
+        }
+
         public readonly record struct DistanceF32x2(float Constant) :
             IOptimizationFunction<Float32x2, float, float>
         {
@@ -60,7 +68,271 @@ public float Evaluate(
             public bool CurrentIsBetter(float current, float proposed) =>
                 current <= proposed;
         }
-        
+
+        /// <summary>
+        /// Represents the Himmelblau function:
+        /// https://en.wikipedia.org/wiki/Test_functions_for_optimization
+        /// </summary>
+        public readonly record struct HimmelblauFunction :
+            IOptimizationFunction<Float32x2, float, float>,
+            IPredefineTestFunction
+        {
+            private static readonly float[] GlobalLowerBounds = new float[]
+            {
+                -5.0f, -5.0f
+            };
+
+            private static readonly float[] GlobalUpperBounds = new float[]
+            {
+                5.0f, 5.0f
+            };
+
+            /// <summary>
+            /// The optimal result.
+            /// </summary>
+            public const float GlobalResult = 0.0f;
+
+            /// <summary>
+            /// Evaluates the Himmelblau function.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static float Evaluate(float x, float y)
+            {
+                float first = (x * x + y - 11);
+                float second = (x + y * y - 7);
+                return first * first + second * second;
+            }
+
+            public float Result => GlobalResult;
+            public float[] LowerBounds => GlobalLowerBounds;
+            public float[] UpperBounds => GlobalUpperBounds;
+
+            public float Evaluate(
+                LongIndex1D index,
+                Index1D dimension,
+                SingleVectorView<Float32x2> positionView)
+            {
+                var first = positionView[0];
+                return Evaluate(first.X, first.Y);
+            }
+
+            public bool CurrentIsBetter(float current, float proposed) =>
+                current < proposed;
+        }
+
+        /// <summary>
+        /// Represents the Easom function:
+        /// https://en.wikipedia.org/wiki/Test_functions_for_optimization
+        /// </summary>
+        public readonly record struct EasomFunction :
+            IOptimizationFunction<Float32x2, float, float>,
+            IPredefineTestFunction
+        {
+            private static readonly float[] GlobalLowerBounds = new float[]
+            {
+                -100.0f, -100.0f
+            };
+
+            private static readonly float[] GlobalUpperBounds = new float[]
+            {
+                100.0f, 100.0f
+            };
+
+            /// <summary>
+            /// The optimal result.
+            /// </summary>
+            public const float GlobalResult = -1.0f;
+
+            public float Result => GlobalResult;
+            public float[] LowerBounds => GlobalLowerBounds;
+            public float[] UpperBounds => GlobalUpperBounds;
+
+            /// <summary>
+            /// Evaluates the Easom function.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static float Evaluate(float x, float y)
+            {
+                float xPart = x - XMath.PI;
+                float yPart = y - XMath.PI;
+                return -XMath.Cos(x) * XMath.Cos(y) *
+                    XMath.Exp(-(xPart * xPart + yPart * yPart));
+            }
+            public float Evaluate(
+                LongIndex1D index,
+                Index1D dimension,
+                SingleVectorView<Float32x2> positionView)
+            {
+                var first = positionView[0];
+                return Evaluate(first.X, first.Y);
+            }
+
+            public bool CurrentIsBetter(float current, float proposed) =>
+                current < proposed;
+        }
+
+        /// <summary>
+        /// Represents the Shaffer function N4:
+        /// https://en.wikipedia.org/wiki/Test_functions_for_optimization
+        /// </summary>
+        public readonly record struct ShafferFunction4 :
+            IOptimizationFunction<Float32x2, float, float>,
+            IPredefineTestFunction
+        {
+            private static readonly float[] GlobalLowerBounds = new float[]
+            {
+                -100.0f, -100.0f
+            };
+
+            private static readonly float[] GlobalUpperBounds = new float[]
+            {
+                100.0f, 100.0f
+            };
+
+            /// <summary>
+            /// The optimal result.
+            /// </summary>
+            public const float GlobalResult = 0.292579f;
+
+            public float Result => GlobalResult;
+            public float[] LowerBounds => GlobalLowerBounds;
+            public float[] UpperBounds => GlobalUpperBounds;
+
+            /// <summary>
+            /// Evaluates the Shaffer function.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static float Evaluate(float x, float y)
+            {
+                float cos = XMath.Cos(XMath.Sin(XMath.Abs(x * x - y * y)));
+                float nominator = cos * cos - 0.5f;
+                float denominator = 1 + 0.001f * (x * x + y * y);
+                return 0.5f + nominator / (denominator * denominator);
+            }
+            public float Evaluate(
+                LongIndex1D index,
+                Index1D dimension,
+                SingleVectorView<Float32x2> positionView)
+            {
+                var first = positionView[0];
+                return Evaluate(first.X, first.Y);
+            }
+
+            public bool CurrentIsBetter(float current, float proposed) =>
+                current < proposed;
+        }
+
+        /// <summary>
+        /// Represents the Rosenbrock function constrained to a disk
+        /// https://en.wikipedia.org/wiki/Test_functions_for_optimization
+        /// </summary>
+        public readonly record struct RosenbrockDisk :
+            IOptimizationFunction<Float32x2, float, float>,
+            IPredefineTestFunction
+        {
+            private static readonly float[] GlobalLowerBounds = new float[]
+            {
+                -1.5f, -1.5f
+            };
+
+            private static readonly float[] GlobalUpperBounds = new float[]
+            {
+                1.5f, 1.5f
+            };
+
+            /// <summary>
+            /// The optimal result.
+            /// </summary>
+            public const float GlobalResult = 0.0f;
+
+            public float Result => GlobalResult;
+            public float[] LowerBounds => GlobalLowerBounds;
+            public float[] UpperBounds => GlobalUpperBounds;
+
+            /// <summary>
+            /// Evaluates the constrained Rosenbrock function.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static float Evaluate(float x, float y)
+            {
+                float xMin = 1.0f - x;
+                float x2 = x * x;
+                float result = xMin * xMin + 100.0f * (y - x2) * (y - x2);
+                if (x * x + y * y <= 2.0f)
+                    return result;
+                return float.MaxValue;
+            }
+
+            public float Evaluate(
+                LongIndex1D index,
+                Index1D dimension,
+                SingleVectorView<Float32x2> positionView)
+            {
+                var first = positionView[0];
+                return Evaluate(first.X, first.Y);
+            }
+
+            public bool CurrentIsBetter(float current, float proposed) =>
+                current < proposed;
+        }
+
+        /// <summary>
+        /// Represents the Gomez and Levy function:
+        /// https://en.wikipedia.org/wiki/Test_functions_for_optimization
+        /// </summary>
+        public readonly record struct GomezAndLevyFunction :
+            IOptimizationFunction<Float32x2, float, float>,
+            IPredefineTestFunction
+        {
+            private static readonly float[] GlobalLowerBounds = new float[]
+            {
+                -1.0f, -1.0f
+            };
+
+            private static readonly float[] GlobalUpperBounds = new float[]
+            {
+                0.75f, 1.0f
+            };
+
+            /// <summary>
+            /// The optimal result.
+            /// </summary>
+            public const float GlobalResult = -1.031628453f;
+
+            public float Result => GlobalResult;
+            public float[] LowerBounds => GlobalLowerBounds;
+            public float[] UpperBounds => GlobalUpperBounds;
+
+            /// <summary>
+            /// Evaluates the constrained Gomez and Levy function.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static float Evaluate(float x, float y)
+            {
+                float x2 = x * x;
+                float x4 = x2 * x2;
+                float y2 = y * y;
+                float y4 = y2 * y2;
+                float result = 4.0f * x2 + 2.1f * x4 + 1.0f / 3.0f * x4 * x2 +
+                    x * y - 4.0f * y2 + 4.0f * y4;
+                float sin = XMath.Sin(2.0f * XMath.PI * y);
+                float conditionValue = -XMath.Sin(4.0f * XMath.PI * x) + 2.0f * sin * sin;
+                return conditionValue < 1.5f ? result : float.MaxValue;
+            }
+
+            public float Evaluate(
+                LongIndex1D index,
+                Index1D dimension,
+                SingleVectorView<Float32x2> positionView)
+            {
+                var first = positionView[0];
+                return Evaluate(first.X, first.Y);
+            }
+
+            public bool CurrentIsBetter(float current, float proposed) =>
+                current < proposed;
+        }
+
         #endregion
 
         #region MemberData
@@ -129,7 +401,7 @@ public static TheoryData<
 
         #endregion
 
-        [SkippableTheory()]
+        [SkippableTheory]
         [MemberData(nameof(TestData))]
         public void ParticleSwarmOptimization<
             TFunc,
@@ -157,7 +429,7 @@ public void ParticleSwarmOptimization<
             Skip.If(
                 Accelerator.AcceleratorType == AcceleratorType.CPU &&
                 optimizerConfig.NumIterations * optimizerConfig.NumParticles > 2048);
-            
+
             const int Seed = 24404699;
             using var pso = new PSO<
                 TNumericType,
@@ -185,7 +457,7 @@ public void ParticleSwarmOptimization<
                 best,
                 optimizerConfig.NumIterations);
             stream.Synchronize();
-            
+
             // Check result
             Assert.True(
                 result.Result - delta <= expected,
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.AdjustSOGPlayers.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.AdjustSOGPlayers.cs
new file mode 100644
index 0000000000..80133ed814
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.AdjustSOGPlayers.cs
@@ -0,0 +1,162 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: MetaOptimizer.AdjustSOGPlayers.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU.Algorithms.Random;
+using ILGPU.Util;
+using System;
+using System.Runtime.CompilerServices;
+using System.Threading.Tasks;
+
+#if NET7_0_OR_GREATER
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    partial class MetaOptimizer<T, TEvalType>
+    {
+        /// <summary>
+        /// A parallel processing state to adjust SOG-based information for all winning
+        /// offensive players from the current solver iteration.
+        /// </summary>
+        /// <typeparam name="TRandom">The random range provider type.</typeparam>
+        private class AdjustSOGPlayersState<TRandom> : InitializePlayersState<T, TRandom>
+            where TRandom : struct, IRandomRangeProvider<T>
+        {
+            /// <summary>
+            /// Creates a new SOG players state.
+            /// </summary>
+            /// <param name="random">The random to use.</param>
+            public AdjustSOGPlayersState(TRandom random)
+                : base(random)
+            { }
+        }
+
+        /// <summary>
+        /// Updates all players according to defensive and offensive winners.
+        /// </summary>
+        /// <typeparam name="TProcessor">The processor type being used.</typeparam>
+        /// <typeparam name="TType">The processing type.</typeparam>
+        /// <typeparam name="TRandom">The random range provider type.</typeparam>
+        private sealed class AdjustSOGPlayers<TProcessor, TType, TRandom> :
+            ParallelProcessingCache<
+                AdjustSOGPlayersState<TRandom>,
+                AdjustSOGPlayers<TProcessor, TType, TRandom>>,
+            IParallelProcessingBody<AdjustSOGPlayersState<TRandom>>
+            where TProcessor : struct, IProcessor<TProcessor, TType>
+            where TType : unmanaged
+            where TRandom : struct, IRandomRangeProvider<T>
+        {
+            private readonly MetaOptimizer<T, TEvalType> parent;
+            private readonly Func<MetaOptimizer<T, TEvalType>, TRandom> getRandom;
+
+            /// <summary>
+            /// Creates a new player update instance.
+            /// </summary>
+            /// <param name="instance">The parent optimizer instance.</param>
+            /// <param name="createRandom">A function creating a new RNG instance.</param>
+            public AdjustSOGPlayers(
+                MetaOptimizer<T, TEvalType> instance,
+                Func<MetaOptimizer<T, TEvalType>, TRandom> createRandom)
+            {
+                parent = instance;
+                getRandom = createRandom;
+            }
+
+            /// <summary>
+            /// Gets or sets the best known position vector.
+            /// </summary>
+            public ReadOnlyMemory<T> BestPosition { get; set; }
+
+            /// <summary>
+            /// Returns the current instance.
+            /// </summary>
+            protected override AdjustSOGPlayers<TProcessor, TType, TRandom>
+                CreateBody() => this;
+
+            /// <summary>
+            /// Creates an intermediate accumulation state.
+            /// </summary>
+            protected override AdjustSOGPlayersState<TRandom>
+                CreateIntermediate() => new(getRandom(parent));
+
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Initialize() { }
+
+            /// <summary>
+            /// Adjusts all SOG-player positions from the current iteration while taking
+            /// SDG and best positions into account.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Process(
+                int index,
+                ParallelLoopState? _,
+                AdjustSOGPlayersState<TRandom> state)
+            {
+                // Load sog index and associated position vector
+                var offensiveIndex = parent.sogList[index];
+                var offensive = parent
+                    .GetNextPosition(offensiveIndex)
+                    .CastUnsafe<T, TType>();
+
+                // Get two fresh random numbers
+                var r1 = state.Next();
+                var r2 = state.Next();
+
+                // Get lower and upper bounds
+                var lowerBounds = parent.lowerBounds.AsSpan().CastUnsafe<T, TType>();
+                var upperBounds = parent.upperBounds.AsSpan().CastUnsafe<T, TType>();
+
+                // Get best position and SDG
+                var bestPosition = BestPosition.Span.CastUnsafe<T, TType>();
+                var sdg = parent.sdg.AsSpan().CastUnsafe<T, TType>();
+
+                // Create new processor for this step
+                var processor = TProcessor.New();
+                for (int i = 0; i < offensive.Length; ++i)
+                {
+                    // Get local offensive item ref
+                    ref var offensiveVec = ref offensive.GetItemRef(i);
+
+                    // Compute new position and set new vector of offensive SOG player
+                    var xOffNew3 = processor.DetermineNewPosition(
+                        offensiveVec,
+                        bestPosition.GetItemRef(i),
+                        sdg.GetItemRef(1),
+                        r1,
+                        r2,
+                        parent.OffensiveSOGStepSize);
+
+                    // Clamp new defensive position and store result
+                    var clamped = processor.Clamp(
+                        lowerBounds.GetItemRef(i),
+                        upperBounds.GetItemRef(i),
+                        xOffNew3);
+                    offensiveVec = clamped;
+                }
+            }
+
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Finalize(
+                ReadOnlySpan<AdjustSOGPlayersState<TRandom>> intermediateStates)
+            { }
+
+        }
+    }
+}
+
+#endif
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Evaluator.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Evaluator.cs
new file mode 100644
index 0000000000..18675bb69a
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Evaluator.cs
@@ -0,0 +1,390 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: MetaOptimizer.Evaluator.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU.Util;
+using System;
+using System.Runtime.CompilerServices;
+using System.Threading.Tasks;
+
+#if NET7_0_OR_GREATER
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    partial class MetaOptimizer<T, TEvalType>
+    {
+        /// <summary>
+        /// A parallel evaluation state storing temporary best result and position
+        /// information per thread.
+        /// </summary>
+        /// <typeparam name="TFunction">
+        /// The optimization function type to use.
+        /// </typeparam>
+        /// <typeparam name="TIntermediate">
+        /// The intermediate state type for each optimization processing thread.
+        /// </typeparam>
+        private sealed class EvaluatorState<TFunction, TIntermediate> : DisposeBase
+            where TFunction :
+                IBaseOptimizationFunction<TEvalType>,
+                IParallelCache<TIntermediate>
+            where TIntermediate : class
+        {
+            private TFunction function;
+            private TEvalType bestKnownResult;
+            private readonly T[] bestPosition;
+
+            /// <summary>
+            /// Creates a new evaluation state.
+            /// </summary>
+            /// <param name="optimizationFunction">
+            /// The optimization function to use.
+            /// </param>
+            /// <param name="numPaddedDimensions">
+            /// The number of padded dimensions taking vector lengths into account.
+            /// </param>
+            public EvaluatorState(TFunction optimizationFunction, int numPaddedDimensions)
+            {
+                function = optimizationFunction;
+                bestPosition = new T[numPaddedDimensions];
+                Intermediate = function.CreateIntermediate();
+            }
+
+            /// <summary>
+            /// Returns the intermediate state of this instance.
+            /// </summary>
+            public TIntermediate Intermediate { get; }
+
+            /// <summary>
+            /// Resets the best known result to the given result value.
+            /// </summary>
+            /// <param name="bestResult">The best result value to store.</param>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Reset(TEvalType bestResult)
+            {
+                bestKnownResult = bestResult;
+                Array.Clear(bestPosition);
+            }
+
+            /// <summary>
+            /// Merges the given result with the internally stored one. If the passed
+            /// result value is considered better than the stored one, the passed position
+            /// vector will be copied to the internally stored best position.
+            /// </summary>
+            /// <param name="result">The result value to merge.</param>
+            /// <param name="position">
+            /// The position that led to the given result value.
+            /// </param>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void MergeWith(TEvalType result, ReadOnlySpan<T> position)
+            {
+                if (function.CurrentIsBetter(bestKnownResult, result))
+                    return;
+
+                bestKnownResult = result;
+                position.CopyTo(bestPosition);
+            }
+
+            /// <summary>
+            /// Aggregates currently available information into the given result field.
+            /// If the objective function determines that the referenced result is worse
+            /// than the one stored internally, the referenced result value is updated
+            /// and the internally stored position is copied to the given result position
+            /// span.
+            /// </summary>
+            /// <param name="result">
+            /// A reference to the currently known best result.
+            /// </param>
+            /// <param name="resultPosition">
+            /// A span pointing to the globally found best result position vector which
+            /// will be updated if the internally stored result value is considered
+            /// better than the referenced one.
+            /// </param>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void AggregateInto(ref TEvalType result, Span<T> resultPosition)
+            {
+                if (function.CurrentIsBetter(result, bestKnownResult))
+                    return;
+
+                result = bestKnownResult;
+                bestPosition.CopyTo(resultPosition);
+            }
+
+            /// <summary>
+            /// Disposes the intermediate state if required.
+            /// </summary>
+            protected override void Dispose(bool disposing)
+            {
+                if (Intermediate is IDisposable disposable)
+                    disposable.Dispose();
+
+                base.Dispose(disposing);
+            }
+        }
+
+        /// <summary>
+        /// Represents a result manager storing best result values.
+        /// </summary>
+        private struct ResultManager
+        {
+            private readonly T[] bestPosition;
+            private TEvalType bestResult;
+
+            /// <summary>
+            /// Creates a new result manager.
+            /// </summary>
+            /// <param name="optimizer">The parent optimizer.</param>
+            /// <param name="bestUserKnownResult">
+            /// The best known result provided by the user.
+            /// </param>
+            /// <param name="bestKnownPosition">
+            /// The best known position provided by the user.
+            /// </param>
+            public ResultManager(
+                MetaOptimizer<T, TEvalType> optimizer,
+                in TEvalType bestUserKnownResult,
+                ReadOnlyMemory<T>? bestKnownPosition)
+            {
+                // Validate our best known position vector
+                if (bestKnownPosition.HasValue &&
+                    bestKnownPosition.Value.Length != NumDimensions)
+                {
+                    throw new ArgumentOutOfRangeException(nameof(bestKnownPosition));
+                }
+
+                bestPosition = new T[optimizer.NumPaddedDimensions];
+                bestResult = BestInitialResult = bestUserKnownResult;
+
+                NumDimensions = optimizer.NumDimensions;
+
+                // Check for a valid best known result
+                if (!bestKnownPosition.HasValue)
+                {
+                    // Reset best known position
+                    for (int i = 0; i < bestPosition.Length; ++i)
+                        bestPosition[i] = T.Zero;
+                }
+                else
+                {
+                    // Copy known position
+                    bestKnownPosition.Value.CopyTo(bestPosition);
+
+                    // Reset remaining parts
+                    for (int i = NumDimensions; i < bestPosition.Length; ++i)
+                        bestPosition[i] = T.Zero;
+                }
+            }
+
+            /// <summary>
+            /// Returns the number of dimensions.
+            /// </summary>
+            public int NumDimensions { get; }
+
+            /// <summary>
+            /// Returns the best found result.
+            /// </summary>
+            public readonly TEvalType BestResult => bestResult;
+
+            /// <summary>
+            /// Returns the best known initial result.
+            /// </summary>
+            public TEvalType BestInitialResult { get; }
+
+            /// <summary>
+            /// Returns the best found position (not padded).
+            /// </summary>
+            public readonly Memory<T> BestPosition =>
+                new(bestPosition, 0, NumDimensions);
+
+            /// <summary>
+            /// Returns the best found internal position (padded).
+            /// </summary>
+            public readonly ReadOnlyMemory<T> BestInternalPosition => bestPosition;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Aggregate<TFunction, TIntermediate>(
+                EvaluatorState<TFunction, TIntermediate> state)
+                where TFunction :
+                    IBaseOptimizationFunction<TEvalType>,
+                    IParallelCache<TIntermediate>
+                where TIntermediate : class =>
+                state.AggregateInto(ref bestResult, bestPosition);
+        }
+
+        /// <summary>
+        /// Represents an abstract evaluator.
+        /// </summary>
+        private interface IEvaluator : IDisposable
+        {
+            /// <summary>
+            /// Returns the underlying result manager.
+            /// </summary>
+            ResultManager ResultManager { get; }
+
+            /// <summary>
+            /// Evaluates all players.
+            /// </summary>
+            /// <param name="options">The parallel processing options.</param>
+            void EvaluatePlayers(ParallelOptions options);
+        }
+
+        /// <summary>
+        /// Represents an objective function evaluator that applies the user-defined
+        /// function to each player position in every step.
+        /// </summary>
+        /// <typeparam name="TFunction">The objective function type.</typeparam>
+        /// <typeparam name="TIntermediate">
+        /// The intermediate state type for each evaluator thread.
+        /// </typeparam>
+        /// <typeparam name="TModifier">The position modifier type.</typeparam>
+        private sealed class Evaluator<TFunction, TIntermediate, TModifier> :
+            ParallelProcessingCache<
+                EvaluatorState<TFunction, TIntermediate>,
+                Evaluator<TFunction, TIntermediate, TModifier>>,
+            IParallelProcessingBody<EvaluatorState<TFunction, TIntermediate>>,
+            IEvaluator
+            where TFunction : ICPUOptimizationFunction<T, TEvalType, TIntermediate>
+            where TIntermediate : class
+            where TModifier : ICPUPositionModifier<T>
+        {
+            private readonly MetaOptimizer<T, TEvalType> parent;
+            private TFunction function;
+            private TModifier modifier;
+
+            private readonly int numPaddedDimensions;
+            private ResultManager resultManager;
+
+            /// <summary>
+            /// Creates a new evaluator.
+            /// </summary>
+            /// <param name="optimizer">The parent optimizer.</param>
+            /// <param name="optimizationFunction">The optimization function.</param>
+            /// <param name="positionModifier">The position modifier.</param>
+            /// <param name="bestUserKnownResult">
+            /// The best known result provided by the user.
+            /// </param>
+            /// <param name="bestKnownPosition">
+            /// The best known position provided by the user.
+            /// </param>
+            public Evaluator(
+                MetaOptimizer<T, TEvalType> optimizer,
+                in TFunction optimizationFunction,
+                in TModifier positionModifier,
+                in TEvalType bestUserKnownResult,
+                ReadOnlyMemory<T>? bestKnownPosition)
+            {
+                parent = optimizer;
+                function = optimizationFunction;
+                modifier = positionModifier;
+
+                numPaddedDimensions = optimizer.NumPaddedDimensions;
+                resultManager = new(optimizer, bestUserKnownResult, bestKnownPosition);
+            }
+
+            /// <summary>
+            /// Returns the result manager.
+            /// </summary>
+            public ResultManager ResultManager => resultManager;
+
+            /// <summary>
+            /// Returns the current instance.
+            /// </summary>
+            protected override Evaluator<
+                TFunction,
+                TIntermediate,
+                TModifier> CreateBody() => this;
+
+            /// <summary>
+            /// Creates an intermediate temporary state.
+            /// </summary>
+            protected override EvaluatorState<TFunction, TIntermediate>
+                CreateIntermediate() =>
+                new(function, numPaddedDimensions);
+
+            /// <summary>
+            /// Resets the given intermediate state by using the best known result
+            /// provided by the user.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+            protected override void InitializeIntermediate(
+                EvaluatorState<TFunction, TIntermediate> intermediateState)
+            {
+                intermediateState.Reset(resultManager.BestInitialResult);
+                function.InitializeIntermediate(intermediateState.Intermediate);
+            }
+
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Initialize() { }
+
+            /// <summary>
+            /// Evaluates all players and accumulates intermediate results.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Process(
+                int index,
+                ParallelLoopState? loopState,
+                EvaluatorState<TFunction, TIntermediate> intermediateState)
+            {
+                // Get the source position and evaluate
+                var positionMemory = parent.GetPositionMemory(index);
+
+                // Adjust position
+                modifier.AdjustPosition(
+                    index,
+                    positionMemory,
+                    resultManager.NumDimensions,
+                    numPaddedDimensions);
+
+                // Convert into a span and evaluate
+                var position = positionMemory.Span;
+                var result = function.Evaluate(position, intermediateState.Intermediate);
+
+                // Store evaluation result
+                parent.evaluations[index] = result;
+
+                // Merge intermediate state
+                intermediateState.MergeWith(result, position);
+            }
+
+            /// <summary>
+            /// Aggregates all temporarily found best results into a globally shared
+            /// state to find the best solution taking all solutions into account.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Finalize(
+                ReadOnlySpan<EvaluatorState<TFunction, TIntermediate>> intermediateStates)
+            {
+                // Iterate over all states and aggregate all information
+                foreach (var state in intermediateStates)
+                {
+                    function.FinishProcessing(state.Intermediate);
+                    resultManager.Aggregate(state);
+                }
+            }
+
+            /// <summary>
+            /// Evaluates all players in parallel using the underlying modifier, eval
+            /// function, and comparison functions.
+            /// </summary>
+            /// <param name="options">The parallel processing options.</param>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void EvaluatePlayers(ParallelOptions options) =>
+                ParallelFor(0, parent.NumPlayers, options);
+        }
+    }
+}
+
+#endif
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.InitializePlayers.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.InitializePlayers.cs
new file mode 100644
index 0000000000..f83463a158
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.InitializePlayers.cs
@@ -0,0 +1,149 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: MetaOptimizer.InitializePlayers.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU.Algorithms.Random;
+using ILGPU.Util;
+using System;
+using System.Runtime.CompilerServices;
+using System.Threading.Tasks;
+
+#if NET7_0_OR_GREATER
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    partial class MetaOptimizer<T, TEvalType>
+    {
+        /// <summary>
+        /// A parallel processing state for player initialization based on random number
+        /// generators used during placement of players.
+        /// </summary>
+        /// <typeparam name="TRandom">A random provider type.</typeparam>
+        /// <typeparam name="TType">A processing type.</typeparam>
+        private class InitializePlayersState<TType, TRandom>
+            where TType : unmanaged
+            where TRandom : struct, IRandomRangeProvider<TType>
+        {
+            private TRandom randomProvider;
+
+            /// <summary>
+            /// Creates a new initialization state.
+            /// </summary>
+            /// <param name="random">The random provider to use.</param>
+            public InitializePlayersState(TRandom random)
+            {
+                randomProvider = random;
+            }
+
+            /// <summary>
+            /// Draws a random number using the given CPU-based RNG provider.
+            /// </summary>
+            /// <returns>The drawn random number.</returns>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public TType Next() => randomProvider.Next();
+        }
+
+        /// <summary>
+        /// A player position initializer.
+        /// </summary>
+        /// <typeparam name="TProcessor">The processor type.</typeparam>
+        /// <typeparam name="TType">The processor element type.</typeparam>
+        /// <typeparam name="TRandom">The random provider type.</typeparam>
+        private sealed class InitializePlayers<TProcessor, TType, TRandom> :
+            ParallelProcessingCache<
+                InitializePlayersState<TType, TRandom>,
+                InitializePlayers<TProcessor, TType, TRandom>>,
+            IParallelProcessingBody<InitializePlayersState<TType, TRandom>>
+            where TProcessor : struct, IProcessor<TProcessor, TType>
+            where TType : unmanaged
+            where TRandom : struct, IRandomRangeProvider<TType>
+        {
+            private readonly MetaOptimizer<T, TEvalType> parent;
+            private readonly Func<MetaOptimizer<T, TEvalType>, TRandom> getRandom;
+
+            /// <summary>
+            /// Creates a new player initializer.
+            /// </summary>
+            /// <param name="optimizer">The parent optimizer.</param>
+            /// <param name="createRandom">A function creating a new RNG instance.</param>
+            public InitializePlayers(
+                MetaOptimizer<T, TEvalType> optimizer,
+                Func<MetaOptimizer<T, TEvalType>, TRandom> createRandom)
+            {
+                parent = optimizer;
+                getRandom = createRandom;
+            }
+
+            /// <summary>
+            /// Returns the current instance.
+            /// </summary>
+            protected override InitializePlayers<TProcessor, TType, TRandom>
+                CreateBody() => this;
+
+            /// <summary>
+            /// Creates an intermediate state which uses the parent RNG to create fresh
+            /// random numbers in parallel.
+            /// </summary>
+            protected override InitializePlayersState<TType, TRandom>
+                CreateIntermediate() => new(getRandom(parent));
+
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Initialize() { }
+
+            /// <summary>
+            /// Accumulates offensive and defensive players into OG and DG vectors.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Process(
+                int index,
+                ParallelLoopState? loopState,
+                InitializePlayersState<TType, TRandom> intermediateState)
+            {
+                // Get player and the local bounds
+                var player = parent.GetPosition(index).CastUnsafe<T, TType>();
+                var lower = parent.lowerBounds.AsSpan().CastUnsafe<T, TType>();
+                var upper = parent.upperBounds.AsSpan().CastUnsafe<T, TType>();
+
+                // Initialize a new processor
+                var processor = TProcessor.New();
+
+                // Initialize all player positions
+                for (int i = 0; i < parent.NumDimensionSlices; ++i)
+                {
+                    // Draw a new random value
+                    var randomValue = intermediateState.Next();
+
+                    // Initialize local position
+                    var initialPosition = processor.GetRandomPosition(
+                        lower.GetItemRef(i),
+                        upper.GetItemRef(i),
+                        randomValue);
+                    player.GetItemRef(i) = initialPosition;
+                }
+            }
+
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Finalize(
+                ReadOnlySpan<InitializePlayersState<TType, TRandom>>
+                    intermediateStates)
+            { }
+        }
+    }
+}
+
+#endif
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Instance.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Instance.cs
new file mode 100644
index 0000000000..5bc677e417
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Instance.cs
@@ -0,0 +1,568 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: MetaOptimizer.Instance.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU.Algorithms.Random;
+using ILGPU.Util;
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Threading.Tasks;
+
+#if NET7_0_OR_GREATER
+
+#pragma warning disable CA1000 // No static members on generic types
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    partial class MetaOptimizer<T, TEvalType>
+    {
+        /// <summary>
+        /// Holds intermediate and run-specific optimizer instances that depend on
+        /// objective function and random instances.
+        /// </summary>
+        /// <typeparam name="TEvaluator">The internal evaluator type.</typeparam>
+        /// <typeparam name="TFunction">The objective function type.</typeparam>
+        /// <typeparam name="TIntermediate">
+        /// The type of all intermediate states during processing.
+        /// </typeparam>
+        /// <typeparam name="TProcessor">The processor type being used.</typeparam>
+        /// <typeparam name="TType">The processor element type.</typeparam>
+        /// <typeparam name="TRandom">The random range generator type.</typeparam>
+        sealed class RuntimeInstance<
+            TEvaluator,
+            TFunction,
+            TIntermediate,
+            TProcessor,
+            TType,
+            TRandom> : DisposeBase
+            where TEvaluator : class, IEvaluator
+            where TFunction : IBaseOptimizationFunction<TEvalType>
+            where TIntermediate : class
+            where TProcessor : struct, IProcessor<TProcessor, TType>
+            where TType : unmanaged
+            where TRandom : struct, IRandomRangeProvider<T>
+        {
+            private readonly MetaOptimizer<T, TEvalType> optimizer;
+            private readonly TEvaluator evaluator;
+            private readonly UpdatePlayers<
+                TFunction,
+                TProcessor,
+                TType,
+                TRandom> updatePlayers;
+
+            /// <summary>
+            /// Creates a new runtime instance.
+            /// </summary>
+            /// <param name="parent">The parent optimizer.</param>
+            /// <param name="createRandom">
+            /// A specialized random provider generator.
+            /// </param>
+            /// <param name="function">The objective function.</param>
+            /// <param name="evaluatorInstance">The evaluator instance.</param>
+            public RuntimeInstance(
+                MetaOptimizer<T, TEvalType> parent,
+                Func<MetaOptimizer<T, TEvalType>, TRandom> createRandom,
+                in TFunction function,
+                TEvaluator evaluatorInstance)
+            {
+                optimizer = parent;
+                evaluator = evaluatorInstance;
+                updatePlayers = new(parent, createRandom, function)
+                {
+                    BestPosition = evaluator.ResultManager.BestInternalPosition
+                };
+            }
+
+            /// <summary>
+            /// Returns the best result manager.
+            /// </summary>
+            public ResultManager ResultManager => evaluator.ResultManager;
+
+            /// <summary>
+            /// Evaluates all player positions.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void EvaluatePlayers(ParallelOptions options) =>
+                evaluator.EvaluatePlayers(options);
+
+            /// <summary>
+            /// Updates all player positions.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void UpdatePlayers(ParallelOptions options)
+            {
+                updatePlayers.ParallelFor(0, optimizer.M, options);
+
+                // Update SOG and SDG information
+                updatePlayers.HasCurrentSOGAndSDG = true;
+            }
+
+            /// <summary>
+            /// Disposes the current evaluator and the specialized update players
+            /// instance.
+            /// </summary>
+            protected override void Dispose(bool disposing)
+            {
+                if (disposing)
+                {
+                    evaluator.Dispose();
+                    updatePlayers.Dispose();
+                }
+                base.Dispose(disposing);
+            }
+        }
+
+        /// <summary>
+        /// An instance implementing
+        /// </summary>
+        /// <typeparam name="TProcessor">The processor type being used.</typeparam>
+        /// <typeparam name="TType">The processor element type.</typeparam>
+        /// <typeparam name="TRandom">
+        /// The random range generator type for scalar types.
+        /// </typeparam>
+        /// <typeparam name="TTypeRandom">
+        /// The random range generator type for specialized processing types.
+        /// </typeparam>
+        sealed class Instance<
+            TProcessor,
+            TType,
+            TRandom,
+            TTypeRandom> : MetaOptimizer<T, TEvalType>
+            where TProcessor : struct, IProcessor<TProcessor, TType>
+            where TType : unmanaged
+            where TRandom : struct, IRandomRangeProvider<T>
+            where TTypeRandom : struct, IRandomRangeProvider<TType>
+        {
+            private readonly ParallelOptions parallelOptions;
+            private readonly OGAndDG<TProcessor, TType> ogAndDG;
+            private readonly AdjustSOGPlayers<
+                TProcessor,
+                TType,
+                TRandom> adjustSOGPlayers;
+            private readonly InitializePlayers<
+                TProcessor,
+                TType,
+                TTypeRandom> initializePlayers;
+
+            private readonly Func<MetaOptimizer<T, TEvalType>, TRandom> getRandom;
+
+            /// <summary>
+            /// Creates a new meta optimizer instance.
+            /// </summary>
+            /// <param name="inputRandom">The input random number generator.</param>
+            /// <param name="numPlayers">The number of players to use.</param>
+            /// <param name="numDimensions">The dimensionality of the problem.</param>
+            /// <param name="maxNumParallelThreads">
+            /// The maximum number of parallel processing threads (if any).
+            /// </param>
+            /// <param name="createRandom">
+            /// A function callback to create random range generators for type T.
+            /// </param>
+            /// <param name="createTTypeRandom">
+            /// A function callback to create random range generators for type TType.
+            /// </param>
+            public Instance(
+                System.Random inputRandom,
+                int numPlayers,
+                int numDimensions,
+                int? maxNumParallelThreads,
+                Func<MetaOptimizer<T, TEvalType>, TRandom> createRandom,
+                Func<MetaOptimizer<T, TEvalType>, TTypeRandom> createTTypeRandom)
+                : base(
+                    inputRandom,
+                    numPlayers,
+                    numDimensions,
+                    maxNumParallelThreads,
+                    TProcessor.Length)
+            {
+                ogAndDG = new(this);
+                adjustSOGPlayers = new(this, createRandom);
+                initializePlayers = new(this, createTTypeRandom);
+
+                getRandom = createRandom;
+
+                // Create new parallel options limiting the max degree of parallelism
+                parallelOptions = new ParallelOptions()
+                {
+                    MaxDegreeOfParallelism = MaxNumWorkers,
+                };
+            }
+
+            /// <summary>
+            /// Optimizes the given optimization function while using a specified
+            /// break function and initial values for the best result.
+            /// </summary>
+            /// <typeparam name="TFunction">The optimization function type.</typeparam>
+            /// <typeparam name="TIntermediate">
+            /// The intermediate optimization state type.
+            /// </typeparam>
+            /// <typeparam name="TBreakFunction">The break function type.</typeparam>
+            /// <typeparam name="TModifier">The position modifier type.</typeparam>
+            /// <param name="optimizationFunction">
+            /// The optimization function to use.
+            /// </param>
+            /// <param name="breakFunction">The break function to use.</param>
+            /// <param name="positionModifier">
+            /// The position modifier to apply to all position updates during
+            /// optimization.
+            /// </param>
+            /// <param name="bestResult">Te best known result.</param>
+            /// <param name="bestKnownPosition">The best known position.</param>
+            /// <returns>
+            /// A tuple consisting of the best result and position found.
+            /// </returns>
+            public override (TEvalType Result, Memory<T> Position) Optimize<
+                TFunction,
+                TIntermediate,
+                TBreakFunction,
+                TModifier>(
+                in TFunction optimizationFunction,
+                in TBreakFunction breakFunction,
+                in TModifier positionModifier,
+                TEvalType bestResult,
+                ReadOnlyMemory<T>? bestKnownPosition = default)
+            {
+                // Create new evaluator based on the given optimization function
+                var evaluator = new Evaluator<TFunction, TIntermediate, TModifier>(
+                    this,
+                    optimizationFunction,
+                    positionModifier,
+                    bestResult,
+                    bestKnownPosition);
+
+                // Create a new runtime instance to track all instances for this run
+                using var runtimeInstance = new RuntimeInstance<
+                    Evaluator<TFunction, TIntermediate, TModifier>,
+                    TFunction,
+                    TIntermediate,
+                    TProcessor,
+                    TType,
+                    TRandom>(
+                    this,
+                    getRandom,
+                    optimizationFunction,
+                    evaluator);
+
+                // Perform optimization
+                OptimizeInternal(breakFunction, runtimeInstance);
+
+                // Load best result information
+                var resultManager = runtimeInstance.ResultManager;
+                return (resultManager.BestResult, resultManager.BestPosition);
+            }
+
+            public override (TEvalType Result, Memory<T> Position) OptimizeRaw(
+                RawCPUOptimizationFunction<T, TEvalType> optimizationFunction,
+                CPUOptimizationBreakFunction<TEvalType> breakFunction,
+                CPUEvaluationComparison<TEvalType> evaluationComparison,
+                TEvalType bestResult,
+                ReadOnlyMemory<T>? bestKnownPosition = default)
+            {
+                // Create new evaluator based on the given optimization function
+                var evaluator = new RawEvaluator(
+                    this,
+                    optimizationFunction,
+                    evaluationComparison,
+                    bestResult,
+                    bestKnownPosition);
+
+                // Create our raw function wrapper
+                var wrapper = new RawComparisonWrapper(evaluationComparison);
+
+                // Create a new runtime instance to track all instances for this run
+                using var runtimeInstance = new RuntimeInstance<
+                    RawEvaluator,
+                    RawComparisonWrapper,
+                    object,
+                    TProcessor,
+                    TType,
+                    TRandom>(
+                    this,
+                    getRandom,
+                    wrapper,
+                    evaluator);
+
+                // Perform optimization
+                var breakFunctionWrapper = new BreakFunctionWrapper(breakFunction);
+                OptimizeInternal(breakFunctionWrapper, runtimeInstance);
+
+                // Load best result information
+                var resultManager = runtimeInstance.ResultManager;
+                return (resultManager.BestResult, resultManager.BestPosition);
+            }
+
+            /// <summary>
+            /// The internal optimizer loop which used the SGO algorithm to adjust
+            /// player/particle positions according to the objective functions and the
+            /// update parameters defined.
+            /// </summary>
+            /// <param name="breakFunction">The break function to use.</param>
+            /// <param name="runtimeInstance">
+            /// The current runtime instance holding all temporary instances.
+            /// </param>
+            [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+            private void OptimizeInternal<
+                TFunction,
+                TIntermediate,
+                TBreakFunction,
+                TEvaluator>(
+                in TBreakFunction breakFunction,
+                RuntimeInstance<
+                    TEvaluator,
+                    TFunction,
+                    TIntermediate,
+                    TProcessor,
+                    TType,
+                    TRandom> runtimeInstance)
+                where TEvaluator : class, IEvaluator
+                where TFunction : IBaseOptimizationFunction<TEvalType>
+                where TIntermediate : class
+                where TBreakFunction : ICPUOptimizationBreakFunction<TEvalType>
+            {
+                // Update internal references
+                adjustSOGPlayers.BestPosition =
+                    runtimeInstance.ResultManager.BestInternalPosition;
+
+                // Initialize all players
+                initializePlayers.ParallelFor(0, NumPlayers, parallelOptions);
+
+                // Evaluate all players first
+                runtimeInstance.EvaluatePlayers(parallelOptions);
+
+                // Enter actual optimizer loop
+                for (int iteration = 0; ; ++iteration)
+                {
+                    // Permute all indices in the beginning
+                    Permute();
+
+                    // Copy positions to new versions
+                    CopyPositions();
+
+                    // Initialize all SOG information
+                    InitSOGList();
+
+                    // Compute OG and DG information
+                    ogAndDG.ParallelFor(0, M, parallelOptions);
+
+                    // Update all players
+                    runtimeInstance.UpdatePlayers(parallelOptions);
+
+                    // Update SOG adjustments
+                    if (iteration > 0)
+                        adjustSOGPlayers.ParallelFor(0, sogListCounter, parallelOptions);
+
+                    // Finally, swap all buffers
+                    SwapBuffers();
+
+                    // Evaluate all players
+                    runtimeInstance.EvaluatePlayers(parallelOptions);
+
+                    // Check for user-defined break predicates
+                    if (breakFunction.Break(
+                        runtimeInstance.ResultManager.BestResult,
+                        iteration))
+                    {
+                        break;
+                    }
+                }
+            }
+
+            #region IDisposable
+
+            /// <summary>
+            /// Disposes internal parallel cache instances.
+            /// </summary>
+            protected override void Dispose(bool disposing)
+            {
+                if (disposing)
+                {
+                    ogAndDG.Dispose();
+                    adjustSOGPlayers.Dispose();
+                    initializePlayers.Dispose();
+                }
+
+                base.Dispose(disposing);
+            }
+
+            #endregion
+        }
+
+        /// <summary>
+        /// Creates a new meta optimizer using non-vectorized scalar operations.
+        /// </summary>
+        /// <typeparam name="TRandom">The random range provider type to use.</typeparam>
+        /// <param name="inputRandom">The input random number generator.</param>
+        /// <param name="numPlayers">
+        /// The number of players to use (must be at least two and an even number).
+        /// </param>
+        /// <param name="numDimensions">
+        /// The number of dimensions (must be greater than one).
+        /// </param>
+        /// <param name="maxNumParallelThreads">
+        /// The maximum number of parallel threads (if any). Not providing a specific
+        /// number of threads means using as many threads as possible.
+        /// </param>
+        /// <returns>The created meta optimizer instance.</returns>
+        [CLSCompliant(false)]
+        public static MetaOptimizer<T, TEvalType> CreateScalar<TRandom>(
+            System.Random inputRandom,
+            int numPlayers,
+            int numDimensions,
+            int? maxNumParallelThreads = null)
+            where TRandom : struct, IRandomRangeProvider<TRandom, T>
+        {
+            // Creates new random range generators using the scalar type T
+            TRandom CreateRandom(MetaOptimizer<T, TEvalType> parent) =>
+                TRandom.Create(parent.random, T.Zero, T.One);
+
+            return new Instance<ScalarProcessor, T, TRandom, TRandom>(
+                inputRandom,
+                numPlayers,
+                numDimensions,
+                maxNumParallelThreads,
+                CreateRandom,
+                CreateRandom);
+        }
+
+        /// <summary>
+        /// Creates a new meta optimizer using vectorized operations.
+        /// </summary>
+        /// <typeparam name="TRandom">The random range provider type to use.</typeparam>
+        /// <param name="inputRandom">The input random number generator.</param>
+        /// <param name="numPlayers">
+        /// The number of players to use (must be at least two and an even number).
+        /// </param>
+        /// <param name="numDimensions">
+        /// The number of dimensions (must be greater than one).
+        /// </param>
+        /// <param name="maxNumParallelThreads">
+        /// The maximum number of parallel threads (if any). Not providing a specific
+        /// number of threads means using as many threads as possible.
+        /// </param>
+        /// <returns>The created meta optimizer instance.</returns>
+        [CLSCompliant(false)]
+        public static MetaOptimizer<T, TEvalType> CreateVectorized<TRandom>(
+            System.Random inputRandom,
+            int numPlayers,
+            int numDimensions,
+            int? maxNumParallelThreads = null)
+            where TRandom : struct, IRandomRangeProvider<TRandom, T>
+        {
+            // Creates new random range generators using the scalar type T
+            TRandom CreateRandom(MetaOptimizer<T, TEvalType> parent) =>
+                TRandom.Create(parent.random, T.Zero, T.One);
+
+            // Creates new random range generators using the vectorized type TType
+            RandomRangeVectorProvider<T, TRandom> CreateVectorizedRandom(
+                MetaOptimizer<T, TEvalType> parent) =>
+                CreateRandom(parent).CreateVectorProvider();
+
+            return new Instance<
+                VectorizedProcessor,
+                Vector<T>,
+                TRandom,
+                RandomRangeVectorProvider<T, TRandom>>(
+                inputRandom,
+                numPlayers,
+                numDimensions,
+                maxNumParallelThreads,
+                CreateRandom,
+                CreateVectorizedRandom);
+        }
+    }
+
+    /// <summary>
+    /// A static helper class for <see cref="MetaOptimizer{T,TEvalType}"/> instances.
+    /// </summary>
+    public static class MetaOptimizer
+    {
+        #region Static
+
+        /// <summary>
+        /// Creates a new meta optimizer using non-vectorized scalar operations.
+        /// </summary>
+        /// <typeparam name="T">
+        /// The main element type for all position vectors.
+        /// </typeparam>
+        /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+        /// <typeparam name="TRandom">The random range provider type to use.</typeparam>
+        /// <param name="inputRandom">The input random number generator.</param>
+        /// <param name="numPlayers">
+        /// The number of players to use (must be at least two and an even number).
+        /// </param>
+        /// <param name="numDimensions">
+        /// The number of dimensions (must be greater than one).
+        /// </param>
+        /// <param name="maxNumParallelThreads">
+        /// The maximum number of parallel threads (if any). Not providing a specific
+        /// number of threads means using as many threads as possible.
+        /// </param>
+        /// <returns>The created meta optimizer instance.</returns>
+        [CLSCompliant(false)]
+        public static MetaOptimizer<T, TEvalType> CreateScalar<T, TEvalType, TRandom>(
+            System.Random inputRandom,
+            int numPlayers,
+            int numDimensions,
+            int? maxNumParallelThreads = null)
+            where T : unmanaged, INumber<T>
+            where TEvalType : struct, IEquatable<TEvalType>
+            where TRandom : struct, IRandomRangeProvider<TRandom, T> =>
+            MetaOptimizer<T, TEvalType>.CreateScalar<TRandom>(
+                inputRandom,
+                numPlayers,
+                numDimensions,
+                maxNumParallelThreads);
+
+        /// <summary>
+        /// Creates a new meta optimizer using vectorized operations.
+        /// </summary>
+        /// <typeparam name="T">
+        /// The main element type for all position vectors.
+        /// </typeparam>
+        /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+        /// <typeparam name="TRandom">The random range provider type to use.</typeparam>
+        /// <param name="inputRandom">The input random number generator.</param>
+        /// <param name="numPlayers">
+        /// The number of players to use (must be at least two and an even number).
+        /// </param>
+        /// <param name="numDimensions">
+        /// The number of dimensions (must be greater than one).
+        /// </param>
+        /// <param name="maxNumParallelThreads">
+        /// The maximum number of parallel threads (if any). Not providing a specific
+        /// number of threads means using as many threads as possible.
+        /// </param>
+        /// <returns>The created meta optimizer instance.</returns>
+        [CLSCompliant(false)]
+        public static MetaOptimizer<T, TEvalType> CreateVectorized<
+            T,
+            TEvalType,
+            TRandom>(
+            System.Random inputRandom,
+            int numPlayers,
+            int numDimensions,
+            int? maxNumParallelThreads = null)
+            where T : unmanaged, INumber<T>
+            where TEvalType : struct, IEquatable<TEvalType>
+            where TRandom : struct, IRandomRangeProvider<TRandom, T> =>
+            MetaOptimizer<T, TEvalType>.CreateVectorized<TRandom>(
+                inputRandom,
+                numPlayers,
+                numDimensions,
+                maxNumParallelThreads);
+
+        #endregion
+    }
+}
+
+#pragma warning restore CA1000
+
+#endif
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.OGAndDG.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.OGAndDG.cs
new file mode 100644
index 0000000000..013a1b32ad
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.OGAndDG.cs
@@ -0,0 +1,192 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: MetaOptimizer.OGAndDG.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU.Util;
+using System;
+using System.Runtime.CompilerServices;
+using System.Threading.Tasks;
+
+#if NET7_0_OR_GREATER
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    partial class MetaOptimizer<T, TEvalType>
+    {
+        /// <summary>
+        /// Represents an intermediate parallel processing state for OG and DG state.
+        /// </summary>
+        private sealed class OGAndDGState
+        {
+            private readonly T[] nextOG;
+            private readonly T[] nextDG;
+
+            /// <summary>
+            /// Creates a new intermediate state.
+            /// </summary>
+            /// <param name="numDimensions">The number of dimensions.</param>
+            public OGAndDGState(int numDimensions)
+            {
+                nextOG = new T[numDimensions];
+                nextDG = new T[numDimensions];
+            }
+
+            /// <summary>
+            /// Returns a span of the given processing type pointing to the next OG.
+            /// </summary>
+            /// <typeparam name="TType">The processing type.</typeparam>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public Span<TType> GetNextOG<TType>() where TType : struct =>
+                nextOG.AsSpan().CastUnsafe<T, TType>();
+
+            /// <summary>
+            /// Returns a span of the given processing type pointing to the next DG.
+            /// </summary>
+            /// <typeparam name="TType">The processing type.</typeparam>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public Span<TType> GetNextDG<TType>() where TType : struct =>
+                nextDG.AsSpan().CastUnsafe<T, TType>();
+        }
+
+        /// <summary>
+        /// Computes OG and DG information.
+        /// </summary>
+        /// <typeparam name="TProcessor">The processor type.</typeparam>
+        /// <typeparam name="TType">The processor element type.</typeparam>
+        private sealed class OGAndDG<TProcessor, TType> :
+            ParallelProcessingCache<OGAndDGState, OGAndDG<TProcessor, TType>>,
+            IParallelProcessingBody<OGAndDGState>
+            where TProcessor : struct, IProcessor<TProcessor, TType>
+            where TType : unmanaged
+        {
+            private readonly MetaOptimizer<T, TEvalType> parent;
+            private readonly T convertedM;
+
+            /// <summary>
+            /// Creates a new OG and DG computer.
+            /// </summary>
+            /// <param name="optimizer">The parent optimizer.</param>
+            public OGAndDG(MetaOptimizer<T, TEvalType> optimizer)
+            {
+                parent = optimizer;
+                convertedM = T.CreateTruncating(optimizer.M);
+            }
+
+            /// <summary>
+            /// Returns the current instance.
+            /// </summary>
+            protected override OGAndDG<TProcessor, TType> CreateBody() => this;
+
+            /// <summary>
+            /// Creates an intermediate temporary accumulation array of two times the
+            /// dimension size.
+            /// </summary>
+            protected override OGAndDGState CreateIntermediate() =>
+                new(parent.NumPaddedDimensions);
+
+            /// <summary>
+            /// Resets the given intermediate state by resetting all values to T.Zero.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+            protected override void InitializeIntermediate(
+                OGAndDGState intermediateState)
+            {
+                var nextOG = intermediateState.GetNextOG<TType>();
+                var nextDG = intermediateState.GetNextDG<TType>();
+
+                parent.Reset<TProcessor, TType>(nextOG, nextDG);
+            }
+
+            /// <summary>
+            /// Resets parent OG and DG vectors for accumulation purposes.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Initialize()
+            {
+                // Reset OG and DG vectors
+                var og = parent.og.AsSpan().CastUnsafe<T, TType>();
+                var dg = parent.dg.AsSpan().CastUnsafe<T, TType>();
+
+                parent.Reset<TProcessor, TType>(og, dg);
+            }
+
+            /// <summary>
+            /// Accumulates offensive and defensive players into OG and DG vectors.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Process(
+                int index,
+                ParallelLoopState? loopState,
+                OGAndDGState intermediateState)
+            {
+                // Get offsets and spans for offensive and defensive players
+                var indices = parent.indices.AsSpan();
+                int offensiveIndex = indices.GetItemRef(index);
+                int defensiveIndex = indices.GetItemRef(index + parent.M);
+
+                // Get the actual source views
+                var offensive = parent
+                    .GetPosition(offensiveIndex)
+                    .CastUnsafe<T, TType>();
+                var defensive = parent
+                    .GetPosition(defensiveIndex)
+                    .CastUnsafe<T, TType>();
+
+                // Get the actual target views
+                var og = intermediateState.GetNextOG<TType>();
+                var dg = intermediateState.GetNextDG<TType>();
+
+                // Accumulate all intermediates
+                parent.Accumulate<TProcessor, TType>(
+                    og,
+                    dg,
+                    offensive,
+                    defensive);
+            }
+
+            /// <summary>
+            /// Accumulates all intermediate OG and DG states while averaging the result.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Finalize(ReadOnlySpan<OGAndDGState> intermediateStates)
+            {
+                var og = parent.og.AsSpan().CastUnsafe<T, TType>();
+                var dg = parent.dg.AsSpan().CastUnsafe<T, TType>();
+
+                // Iterate over all dimensions and states accumulate results
+                foreach (var state in intermediateStates)
+                {
+                    var sourceOG = state.GetNextOG<TType>();
+                    var sourceDG = state.GetNextDG<TType>();
+
+                    parent.Accumulate<TProcessor, TType>(
+                        og,
+                        dg,
+                        sourceOG,
+                        sourceDG);
+                }
+
+                // Compute averages over all dimension slices
+                parent.ComputeAverage<TProcessor, TType>(
+                    og,
+                    dg,
+                    convertedM);
+            }
+        }
+    }
+}
+
+#endif
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.RawEvaluator.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.RawEvaluator.cs
new file mode 100644
index 0000000000..57f08e2545
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.RawEvaluator.cs
@@ -0,0 +1,205 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: MetaOptimizer.RawEvaluator.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU.Util;
+using System;
+using System.Runtime.CompilerServices;
+using System.Threading.Tasks;
+
+#if NET7_0_OR_GREATER
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    partial class MetaOptimizer<T, TEvalType>
+    {
+        /// <summary>
+        /// Represents a comparison interface wrapper around a delegate comparison
+        /// function used to compare evaluation results.
+        /// </summary>
+        /// <param name="EvaluationComparison">The evaluation delegate.</param>
+        private readonly record struct RawComparisonWrapper(
+            CPUEvaluationComparison<TEvalType> EvaluationComparison) :
+            IBaseOptimizationFunction<TEvalType>,
+            IParallelCache<object>
+        {
+            /// <summary>
+            /// Represents a shared intermediate state holding a valid object instance.
+            /// </summary>
+            public static readonly object SharedIntermediateState = new();
+
+            /// <summary>
+            /// Invokes the underlying comparison delegate to compare current and proposed
+            /// evaluation instances.
+            /// </summary>
+            public bool CurrentIsBetter(TEvalType current, TEvalType proposed) =>
+                EvaluationComparison(current, proposed);
+
+            /// <summary>
+            /// Returns the shared intermediate state object.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public object CreateIntermediate() => SharedIntermediateState;
+
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void InitializeIntermediate(object intermediateState) { }
+
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void FinishProcessing(object intermediateState) { }
+        }
+
+        /// <summary>
+        /// Represents an objective function evaluator that applies the user-defined
+        /// function to each player position in every step.
+        /// </summary>
+        private sealed class RawEvaluator :
+            ParallelProcessingCache<
+                EvaluatorState<RawComparisonWrapper, object>,
+                RawEvaluator>,
+            IParallelProcessingBody<EvaluatorState<RawComparisonWrapper, object>>,
+            IEvaluator
+        {
+            private readonly MetaOptimizer<T, TEvalType> parent;
+            private readonly RawCPUOptimizationFunction<T, TEvalType> function;
+            private readonly CPUEvaluationComparison<TEvalType> comparison;
+
+            private readonly int numPaddedDimensions;
+            private ResultManager resultManager;
+
+            /// <summary>
+            /// Creates a new evaluator.
+            /// </summary>
+            /// <param name="optimizer">The parent optimizer.</param>
+            /// <param name="optimizationFunction">The optimization function.</param>
+            /// <param name="evaluationComparison">The eval comparision function.</param>
+            /// <param name="bestUserKnownResult">
+            /// The best known result provided by the user.
+            /// </param>
+            /// <param name="bestKnownPosition">
+            /// The best known position provided by the user.
+            /// </param>
+            public RawEvaluator(
+                MetaOptimizer<T, TEvalType> optimizer,
+                RawCPUOptimizationFunction<T, TEvalType> optimizationFunction,
+                CPUEvaluationComparison<TEvalType> evaluationComparison,
+                in TEvalType bestUserKnownResult,
+                ReadOnlyMemory<T>? bestKnownPosition)
+            {
+                parent = optimizer;
+                function = optimizationFunction;
+                comparison = evaluationComparison;
+
+                numPaddedDimensions = optimizer.NumPaddedDimensions;
+                resultManager = new(optimizer, bestUserKnownResult, bestKnownPosition);
+            }
+
+            /// <summary>
+            /// Returns the result manager.
+            /// </summary>
+            public ResultManager ResultManager => resultManager;
+
+            /// <summary>
+            /// Returns the current instance.
+            /// </summary>
+            protected override RawEvaluator CreateBody() => this;
+
+            /// <summary>
+            /// Creates an intermediate temporary state.
+            /// </summary>
+            protected override EvaluatorState<
+                RawComparisonWrapper,
+                object> CreateIntermediate() =>
+                new(new(comparison), numPaddedDimensions);
+
+            /// <summary>
+            /// Resets the given intermediate state by using the best known result
+            /// provided by the user.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+            protected override void InitializeIntermediate(
+                EvaluatorState<RawComparisonWrapper, object> intermediateState) =>
+                intermediateState.Reset(resultManager.BestInitialResult);
+
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Initialize() { }
+
+            /// <summary>
+            /// Evaluates all players and accumulates intermediate results.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Process(
+                int index,
+                ParallelLoopState? loopState,
+                EvaluatorState<RawComparisonWrapper, object> intermediateState)
+            {
+                // Get the source position
+                var position = parent.GetPosition(index);
+
+                // Get the evaluation result
+                var result = parent.evaluations[index];
+
+                // Merge intermediate state
+                intermediateState.MergeWith(result, position);
+            }
+
+            /// <summary>
+            /// Aggregates all temporarily found best results into a globally shared
+            /// state to find the best solution taking all solutions into account.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Finalize(
+                ReadOnlySpan<
+                    EvaluatorState<RawComparisonWrapper, object>> intermediateStates)
+            {
+                // Iterate over all states and aggregate all information
+                foreach (var state in intermediateStates)
+                    resultManager.Aggregate(state);
+            }
+
+            /// <summary>
+            /// Evaluates all players using the given raw evaluation function first.
+            /// After having evaluated all particle positions, it reduces all results
+            /// in parallel.
+            /// </summary>
+            /// <param name="options">The parallel processing options.</param>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void EvaluatePlayers(ParallelOptions options)
+            {
+                // Evaluate all players using the provided raw function
+                function(
+                    parent.positions.AsMemory(),
+                    parent.evaluations.AsMemory(),
+                    ResultManager.NumDimensions,
+                    parent.NumPaddedDimensions,
+                    parent.NumPlayers,
+                    new(parent.NumPaddedDimensions),
+                    options);
+
+                // Reduce all results in parallel
+                ParallelFor(0, parent.NumPlayers, options);
+            }
+        }
+    }
+}
+
+#endif
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Scalar.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Scalar.cs
new file mode 100644
index 0000000000..ae14ac88a7
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Scalar.cs
@@ -0,0 +1,100 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: MetaOptimizer.Scalar.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using System.Runtime.CompilerServices;
+
+#if NET7_0_OR_GREATER
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    partial class MetaOptimizer<T, TEvalType>
+    {
+        /// <summary>
+        /// A scalar processor using default ALUs.
+        /// </summary>
+        private readonly struct ScalarProcessor : IProcessor<ScalarProcessor, T>
+        {
+            /// <summary>
+            /// Creates a new scalar processor.
+            /// </summary>
+            public static ScalarProcessor New() => default;
+
+            /// <summary>
+            /// Returns 1;
+            /// </summary>
+            public static int Length => 1;
+
+            /// <summary>
+            /// Clamps the given value.
+            /// </summary>
+            /// <param name="lower">The lower bounds part.</param>
+            /// <param name="upper">The upper bounds part.</param>
+            /// <param name="value">The value to clamp.</param>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public T Clamp(T lower, T upper, T value) =>
+                T.Clamp(value, lower, upper);
+
+            /// <summary>
+            /// Resets the given data view.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Reset(out T data) => data = T.Zero;
+
+            /// <summary>
+            /// Adds the given source to the target view.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Accumulate(ref T target, T source) =>
+                target += source;
+
+            /// <summary>
+            /// Computes the average by taking the given count into account.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void ComputeAverage(ref T target, T count) =>
+                target /= count;
+
+            /// <summary>
+            /// Determines a newly sampled position using scalars.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public T GetRandomPosition(T lower, T upper, T randomNumber)
+            {
+                // Interpolate between lower and upper bound
+                var lowerInfluence = (T.One - randomNumber) * lower;
+                var upperInfluence = randomNumber * upper;
+                return lowerInfluence + upperInfluence;
+            }
+
+            /// <summary>
+            /// Determines a newly sampled position using scalars.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public T DetermineNewPosition(
+                T position,
+                T firstC,
+                T secondC,
+                T r1,
+                T r2,
+                T stepSize)
+            {
+                // Determine new offset to use
+                var newOffset = r1 * firstC - r2 * secondC;
+
+                // Compute final position
+                var finalPos = position + newOffset * stepSize;
+                return finalPos;
+            }
+        }
+    }
+}
+
+#endif
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.UpdatePlayers.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.UpdatePlayers.cs
new file mode 100644
index 0000000000..9f1d0a314d
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.UpdatePlayers.cs
@@ -0,0 +1,414 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: MetaOptimizer.UpdatePlayers.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU.Algorithms.Random;
+using ILGPU.Util;
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Threading.Tasks;
+
+#if NET7_0_OR_GREATER
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    partial class MetaOptimizer<T, TEvalType>
+    {
+        /// <summary>
+        /// Represents an intermediate parallel processing state for updating players.
+        /// </summary>
+        /// <typeparam name="TRandom">The random provider type.</typeparam>
+        private sealed class UpdatePlayersState<TRandom> : AdjustSOGPlayersState<TRandom>
+            where TRandom : struct, IRandomRangeProvider<T>
+        {
+            private readonly T[] nextSOG;
+            private readonly T[] nextSDG;
+
+            private int nextSOGCounter;
+            private int nextSDGCounter;
+
+            /// <summary>
+            /// Creates new intermediate state.
+            /// </summary>
+            /// <param name="provider">The random provider instance.</param>
+            /// <param name="numDimensions">The number of dimensions.</param>
+            public UpdatePlayersState(TRandom provider, int numDimensions)
+                : base(provider)
+            {
+                nextSOG = new T[numDimensions];
+                nextSDG = new T[numDimensions];
+            }
+
+            /// <summary>
+            /// Resets all internally stored counters.
+            /// </summary>
+            public void ResetCounters()
+            {
+                nextSOGCounter = 0;
+                nextSDGCounter = 0;
+            }
+
+            /// <summary>
+            /// Adds a new SOG member.
+            /// </summary>
+            public void AddSOGMember() => ++nextSOGCounter;
+
+            /// <summary>
+            /// Adds a new SDG member.
+            /// </summary>
+            public void AddSDGMember() => ++nextSDGCounter;
+
+            /// <summary>
+            /// Returns a span of the given processing type pointing to the next SOG.
+            /// </summary>
+            /// <typeparam name="TType">The processing type.</typeparam>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public Span<TType> GetNextSOG<TType>() where TType : struct =>
+                nextSOG.AsSpan().CastUnsafe<T, TType>();
+
+            /// <summary>
+            /// Returns a span of the given processing type pointing to the next SDG.
+            /// </summary>
+            /// <typeparam name="TType">The processing type.</typeparam>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public Span<TType> GetNextSDG<TType>() where TType : struct =>
+                nextSDG.AsSpan().CastUnsafe<T, TType>();
+
+            /// <summary>
+            /// Accumulates externally provided counters for SOG and SDG members.
+            /// </summary>
+            public void AccumulateCounters(ref int sogMembers, ref int sdgMembers)
+            {
+                sogMembers += nextSOGCounter;
+                sdgMembers += nextSDGCounter;
+            }
+        }
+
+        /// <summary>
+        /// Updates all players according to defensive and offensive winners.
+        /// </summary>
+        /// <typeparam name="TFunction">The objective function type to use.</typeparam>
+        /// <typeparam name="TProcessor">The processor type being used.</typeparam>
+        /// <typeparam name="TType">The processor element type.</typeparam>
+        /// <typeparam name="TRandom">The random provider type.</typeparam>
+        private sealed class UpdatePlayers<
+            TFunction,
+            TProcessor,
+            TType,
+            TRandom> :
+            ParallelProcessingCache<
+                UpdatePlayersState<TRandom>,
+                UpdatePlayers<
+                    TFunction,
+                    TProcessor,
+                    TType,
+                    TRandom>>,
+            IParallelProcessingBody<UpdatePlayersState<TRandom>>
+            where TFunction : IBaseOptimizationFunction<TEvalType>
+            where TProcessor : struct, IProcessor<TProcessor, TType>
+            where TType : unmanaged
+            where TRandom : struct, IRandomRangeProvider<T>
+        {
+            private readonly MetaOptimizer<T, TEvalType> parent;
+            private readonly Func<MetaOptimizer<T, TEvalType>, TRandom> getRandom;
+            private readonly TFunction function;
+
+            private volatile bool hasSOGAndSDG;
+
+            /// <summary>
+            /// Creates a new player update instance.
+            /// </summary>
+            /// <param name="optimizer">The parent optimizer instance.</param>
+            /// <param name="createRandom">A function creating a new RNG instance.</param>
+            /// <param name="optimizationFunction">The objective function.</param>
+            public UpdatePlayers(
+                MetaOptimizer<T, TEvalType> optimizer,
+                Func<MetaOptimizer<T, TEvalType>, TRandom> createRandom,
+                in TFunction optimizationFunction)
+            {
+                parent = optimizer;
+                getRandom = createRandom;
+                function = optimizationFunction;
+
+                NumDimensionSlices = optimizer.NumDimensionSlices;
+            }
+
+            /// <summary>
+            /// Returns the current instance.
+            /// </summary>
+            protected override UpdatePlayers<
+                TFunction,
+                TProcessor,
+                TType,
+                TRandom> CreateBody() => this;
+
+            /// <summary>
+            /// Returns the number of dimensions per processing step.
+            /// </summary>
+            public int NumDimensionSlices { get; }
+
+            /// <summary>
+            /// Returns true if SOG and SDG information has been available.
+            /// </summary>
+            public bool HasCurrentSOGAndSDG
+            {
+                get => hasSOGAndSDG;
+                set => hasSOGAndSDG = value;
+            }
+
+            /// <summary>
+            /// Gets or sets the best known position vector.
+            /// </summary>
+            public ReadOnlyMemory<T> BestPosition { get; set; }
+
+            /// <summary>
+            /// Creates an intermediate temporary state.
+            /// </summary>
+            protected override UpdatePlayersState<TRandom> CreateIntermediate() =>
+                new(getRandom(parent), parent.NumPaddedDimensions);
+
+            /// <summary>
+            /// Resets the given intermediate state by resetting all values to T.Zero.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+            protected override void InitializeIntermediate(
+                UpdatePlayersState<TRandom> intermediateState)
+            {
+                // Reset next SOG and SDG vectors
+                var nextSOG = intermediateState.GetNextSOG<TType>();
+                var nextSDG = intermediateState.GetNextSDG<TType>();
+
+                parent.Reset<TProcessor, TType>(nextSOG, nextSDG);
+
+                // Reset SOG and SDG counters
+                intermediateState.ResetCounters();
+            }
+
+            /// <summary>
+            /// Resets the next SOG and SDG vectors.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Initialize()
+            {
+                // Reset parent next SOG and SDG vectors
+                var nextSOG = parent.nextSOG.AsSpan().CastUnsafe<T, TType>();
+                var nextSDG = parent.nextSDG.AsSpan().CastUnsafe<T, TType>();
+
+                parent.Reset<TProcessor, TType>(nextSOG, nextSDG);
+            }
+
+            /// <summary>
+            /// Accumulates offensive and defensive players into OG and DG vectors.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Process(
+                int index,
+                ParallelLoopState? _,
+                UpdatePlayersState<TRandom> state)
+            {
+                // Get offsets and spans for offensive and defensive players
+                var indices = parent.indices.AsSpan();
+                int offensiveIndex = indices.GetItemRef(index);
+                int defensiveIndex = indices.GetItemRef(index + parent.M);
+
+                // Get the actual source views
+                var offensive = parent.GetPosition(offensiveIndex);
+                var defensive = parent.GetPosition(defensiveIndex);
+
+                // Evaluate both positions and test whether the offensive or the defensive
+                // player wins this competition
+                var evaluatedOffensive = parent.evaluations[offensiveIndex];
+                var evaluatedDefensive = parent.evaluations[defensiveIndex];
+                bool offensiveWins = function.CurrentIsBetter(
+                    evaluatedOffensive,
+                    evaluatedDefensive);
+
+                // Get lower and upper bounds
+                var lowerBounds = parent.lowerBounds.AsSpan().CastUnsafe<T, TType>();
+                var upperBounds = parent.upperBounds.AsSpan().CastUnsafe<T, TType>();
+
+                // Get the current players
+                var currentOffensive = offensive.CastUnsafe<T, TType>();
+                var currentDefensive = defensive.CastUnsafe<T, TType>();
+
+                // Create new processor for this iteration
+                var processor = TProcessor.New();
+                if (offensiveWins)
+                {
+                    // Get two random numbers
+                    var r1 = state.Next();
+                    var r2 = state.Next();
+
+                    // Get OG vector
+                    var og = parent.og.AsSpan().CastUnsafe<T, TType>();
+
+                    // Get a random offensive player
+                    int randomOffensiveIndex = parent.GetRandomOffensiveIndex(index);
+                    var randomOffensive = parent
+                        .GetPosition(randomOffensiveIndex)
+                        .CastUnsafe<T, TType>();
+
+                    // Fetch next vector references
+                    var nextSOG = state.GetNextSOG<TType>();
+                    var nextDefensive = parent
+                        .GetNextPosition(defensiveIndex)
+                        .CastUnsafe<T, TType>();
+                    for (int i = 0; i < NumDimensionSlices; ++i)
+                    {
+                        // Compute new position and set new vector of defensive player
+                        var xDefNew1 = processor.DetermineNewPosition(
+                            currentDefensive.GetItemRef(i),
+                            og.GetItemRef(i),
+                            randomOffensive.GetItemRef(i),
+                            r1,
+                            r2,
+                            parent.DefensiveStepSize);
+
+                        // Clamp new defensive position and store result
+                        var clamped = processor.Clamp(
+                            lowerBounds.GetItemRef(i),
+                            upperBounds.GetItemRef(i),
+                            xDefNew1);
+                        nextDefensive.GetItemRef(i) = clamped;
+
+                        // Accumulate SOG result
+                        processor.Accumulate(
+                            ref nextSOG.GetItemRef(i),
+                            currentOffensive.GetItemRef(i));
+                    }
+
+                    // Add new SOG member to state
+                    state.AddSOGMember();
+
+                    // Add offensive player to next sog
+                    int sogIndex = Interlocked.Add(ref parent.sogListCounter, 1);
+                    parent.sogList[sogIndex] = offensiveIndex;
+                }
+                else
+                {
+                    // Get four random numbers
+                    var r1 = state.Next();
+                    var r2 = state.Next();
+                    var r3 = state.Next();
+                    var r4 = state.Next();
+
+                    // Get DG vector
+                    var dg = parent.dg.AsSpan().CastUnsafe<T, TType>();
+
+                    // Get random defensive player
+                    int randomDefensiveIndex = parent.GetRandomDefensiveIndex(index);
+                    var randomDefensive = parent
+                        .GetPosition(randomDefensiveIndex)
+                        .CastUnsafe<T, TType>();
+
+                    // Get SOG and best position data
+                    var sog = parent.sog.AsSpan().CastUnsafe<T, TType>();
+                    var bestPosition = BestPosition.Span.CastUnsafe<T, TType>();
+
+                    // Fetch next vector references
+                    var nextSDG = state.GetNextSDG<TType>();
+                    var nextOffensive = parent
+                        .GetNextPosition(offensiveIndex)
+                        .CastUnsafe<T, TType>();
+                    for (int i = 0; i < NumDimensionSlices; ++i)
+                    {
+                        // Compute new position and set new vector of offensive player
+                        var xOffNew1 = processor.DetermineNewPosition(
+                            currentOffensive.GetItemRef(i),
+                            dg.GetItemRef(i),
+                            randomDefensive.GetItemRef(i),
+                            r1,
+                            r2,
+                            parent.OffensiveStepSize);
+
+                        // Check whether we can apply SOG adjustments
+                        var xOffNew2 = xOffNew1;
+                        if (HasCurrentSOGAndSDG)
+                        {
+                            xOffNew2 = processor.DetermineNewPosition(
+                                xOffNew1,
+                                sog.GetItemRef(i),
+                                bestPosition.GetItemRef(i),
+                                r3,
+                                r4,
+                                parent.OffensiveSOGStepSize);
+                        }
+
+                        // Clamp new offensive position and store result
+                        var clamped = processor.Clamp(
+                            lowerBounds.GetItemRef(i),
+                            upperBounds.GetItemRef(i),
+                            xOffNew2);
+                        nextOffensive.GetItemRef(i) = clamped;
+
+                        // Accumulate SDG result
+                        processor.Accumulate(
+                            ref nextSDG.GetItemRef(i),
+                            currentDefensive.GetItemRef(i));
+                    }
+
+                    // Add new SDG member to state
+                    state.AddSDGMember();
+                }
+            }
+
+            /// <summary>
+            /// Accumulates next SOG and SDG values based on all previous intermediate
+            /// update states.
+            /// </summary>
+            [MethodImpl(
+                MethodImplOptions.AggressiveInlining |
+                MethodImplOptions.AggressiveOptimization)]
+            public void Finalize(
+                ReadOnlySpan<UpdatePlayersState<TRandom>> intermediateStates)
+            {
+                var sog = parent.nextSOG.AsSpan().CastUnsafe<T, TType>();
+                var sdg = parent.nextSDG.AsSpan().CastUnsafe<T, TType>();
+
+                // Store total counters
+                int sogMembers = 0;
+                int sdgMembers = 0;
+
+                // Iterate over all dimensions and states accumulate results
+                foreach (var state in intermediateStates)
+                {
+                    var sourceSOG = state.GetNextSOG<TType>();
+                    var sourceSDG = state.GetNextSDG<TType>();
+
+                    parent.Accumulate<TProcessor, TType>(
+                        sog,
+                        sdg,
+                        sourceSOG,
+                        sourceSDG);
+
+                    state.AccumulateCounters(ref sogMembers, ref sdgMembers);
+                }
+
+                // Ensure that we have not lost a single particle
+                Debug.Assert(sogMembers + sdgMembers == parent.M);
+
+                // Compute averages over all dimension slices
+                parent.ComputeAverage<TProcessor, TType>(
+                    sog,
+                    sdg,
+                    T.CreateSaturating(sogMembers),
+                    T.CreateSaturating(sdgMembers));
+            }
+        }
+    }
+}
+
+#endif
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Vectorized.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Vectorized.cs
new file mode 100644
index 0000000000..ce45e153a0
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.Vectorized.cs
@@ -0,0 +1,117 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: MetaOptimizer.Vectorized.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using System.Numerics;
+using System.Runtime.CompilerServices;
+
+#if NET7_0_OR_GREATER
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    partial class MetaOptimizer<T, TEvalType>
+    {
+        /// <summary>
+        /// A vectorized processor using SIMD operations.
+        /// </summary>
+        private readonly struct VectorizedProcessor :
+            IProcessor<VectorizedProcessor, Vector<T>>
+        {
+            /// <summary>
+            /// Creates a new vectorized processor.
+            /// </summary>
+            public static VectorizedProcessor New() => default;
+
+            /// <summary>
+            /// Returns the vector length.
+            /// </summary>
+            public static int Length => Vector<int>.Count;
+
+            /// <summary>
+            /// Clamps the given vector.
+            /// </summary>
+            /// <param name="lower">The lower bounds part.</param>
+            /// <param name="upper">The upper bounds part.</param>
+            /// <param name="value">The vector to clamp.</param>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public Vector<T> Clamp(
+                Vector<T> lower,
+                Vector<T> upper,
+                Vector<T> value) =>
+                Vector.Min(Vector.Max(value, lower), upper);
+
+            /// <summary>
+            /// Resets the given data view.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Reset(out Vector<T> data) =>
+                data = new Vector<T>(T.Zero);
+
+            /// <summary>
+            /// Adds the given source to the target view.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Accumulate(ref Vector<T> target, Vector<T> source)
+            {
+                var accumulated = source + target;
+                target = accumulated;
+            }
+
+            /// <summary>
+            /// Computes the average by taking the given count into account.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void ComputeAverage(ref Vector<T> target, T count)
+            {
+                var countValue = new Vector<T>(count);
+                var average = target / countValue;
+                target = average;
+            }
+
+            /// <summary>
+            /// Determines a newly sampled position using vectors.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public Vector<T> GetRandomPosition(
+                Vector<T> lower,
+                Vector<T> upper,
+                Vector<T> randomNumber)
+            {
+                // Interpolate between lower and upper bound
+                var lowerFactor = new Vector<T>(T.One) - randomNumber;
+                var lowerInfluence = lowerFactor * lower;
+                var upperInfluence = randomNumber * upper;
+                return lowerInfluence + upperInfluence;
+            }
+
+            /// <summary>
+            /// Determines a newly sampled position using vectors.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public Vector<T> DetermineNewPosition(
+                Vector<T> position,
+                Vector<T> firstC,
+                Vector<T> secondC,
+                T r1,
+                T r2,
+                T stepSize)
+            {
+                // Determine new offset to use
+                var newOffset = r1 * firstC - r2 * secondC;
+
+                // Compute final position
+                var finalPos = position + newOffset * stepSize;
+                return finalPos;
+            }
+        }
+    }
+}
+
+#endif
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.cs b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.cs
new file mode 100644
index 0000000000..fee36b5cba
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/MetaOptimizer.cs
@@ -0,0 +1,874 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: MetaOptimizer.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU.Util;
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+#if NET7_0_OR_GREATER
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    /// <summary>
+    /// This meta optimizer is designed for CPUs and used special .Net features for
+    /// improved performance. It implements an optimization-performance and runtime-
+    /// performance optimized version of the SGO algorithm:
+    /// Squid Game Optimizer (SGO): a novel metaheuristic algorithm
+    /// doi: 10.1038/s41598-023-32465-z.
+    /// </summary>
+    /// <typeparam name="T">The main element type for all position vectors.</typeparam>
+    /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+    /// <remarks>
+    /// This version *does not* implement the vanilla SGO algorithm from the paper.
+    /// Instead, it uses modified update functions and specially tweaked position update
+    /// logic using multiple buffers and tuned SGO-winner lists. These modifications of
+    /// the original algorithm make this implementation significantly better in terms of
+    /// optimization quality and runtime performance. Moreover, this version is fully
+    /// parallelized and has the ability to use SIMD vector instructions to improve
+    /// runtime performance.
+    /// </remarks>
+    public abstract partial class MetaOptimizer<T, TEvalType> : DisposeBase
+        where T : unmanaged, INumber<T>
+        where TEvalType : struct, IEquatable<TEvalType>
+    {
+        #region Nested Types
+
+        /// <summary>
+        /// A scalar or vectorized processor implementing the actual SGO equations.
+        /// </summary>
+        /// <typeparam name="TSelf">The implementing processor type.</typeparam>
+        /// <typeparam name="TType">The operating element type.</typeparam>
+        private interface IProcessor<TSelf, TType>
+            where TSelf : struct, IProcessor<TSelf, TType>
+            where TType : unmanaged
+        {
+            /// <summary>
+            /// Creates a new processor instance.
+            /// </summary>
+            static abstract TSelf New();
+
+            /// <summary>
+            /// Returns the number of elements processed in single step.
+            /// </summary>
+            static abstract int Length { get; }
+
+            /// <summary>
+            /// Resets the given data view.
+            /// </summary>
+            void Reset(out TType data);
+
+            /// <summary>
+            /// Adds the given source to the target view.
+            /// </summary>
+            /// <param name="target">The target span to accumulate into.</param>
+            /// <param name="source">The source span.</param>
+            void Accumulate(ref TType target, TType source);
+
+            /// <summary>
+            /// Clamps the given value.
+            /// </summary>
+            /// <param name="lower">The lower bounds part.</param>
+            /// <param name="upper">The upper bounds part.</param>
+            /// <param name="value">The value to clamp.</param>
+            TType Clamp(TType lower, TType upper, TType value);
+
+            /// <summary>
+            /// Computes the average by taking the given count into account.
+            /// </summary>
+            /// <param name="target">The target span to read from and write to.</param>
+            /// <param name="count">The number of points to consider.</param>
+            void ComputeAverage(ref TType target, T count);
+
+            /// <summary>
+            /// Determines a newly sampled random position within the bounds of lower
+            /// and upper values.
+            /// </summary>
+            /// <param name="lower">The lower bounds of the position vector.</param>
+            /// <param name="upper">The upper bounds of the position vector.</param>
+            /// <param name="randomNumber">The random number to use.</param>
+            /// <returns>The newly sampled position.</returns>
+            TType GetRandomPosition(
+                TType lower,
+                TType upper,
+                TType randomNumber);
+
+            /// <summary>
+            /// Determines a newly sampled position.
+            /// </summary>
+            /// <param name="position">The source position.</param>
+            /// <param name="firstC">The first centroid position.</param>
+            /// <param name="secondC">The second centroid position.</param>
+            /// <param name="r1">
+            /// The factor describing the influence of <paramref name="firstC"/>.
+            /// </param>
+            /// <param name="r2">
+            /// The factor describing the influence of <paramref name="secondC"/>.
+            /// </param>
+            /// <param name="stepSize">
+            /// The step size to use for offset computations.
+            /// </param>
+            /// <returns>The newly determined position.</returns>
+            TType DetermineNewPosition(
+                TType position,
+                TType firstC,
+                TType secondC,
+                T r1,
+                T r2,
+                T stepSize);
+        }
+
+        /// <summary>
+        /// A specialized function wrapper implementing the required CPUOptimization
+        /// interfaces to call delegate functions instead of having inline function
+        /// specifications.
+        /// </summary>
+        /// <param name="EvalFunction">The evaluation function to be used.</param>
+        /// <param name="EvaluationComparison">
+        /// The function determining whether the first or the second evaluation value
+        /// given is considered better for the optimization problem.
+        /// </param>
+        /// <param name="BreakFunction">
+        /// The break function to determine whether to break the solver iteration or not.
+        /// </param>
+        private readonly record struct FunctionWrapper(
+            CPUOptimizationFunction<T, TEvalType> EvalFunction,
+            CPUOptimizationBreakFunction<TEvalType> BreakFunction,
+            CPUEvaluationComparison<TEvalType> EvaluationComparison) :
+            ICPUOptimizationFunction<T, TEvalType>,
+            ICPUOptimizationBreakFunction<TEvalType>
+        {
+            /// <summary>
+            /// Immediately calls the given evaluation function.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public TEvalType Evaluate(ReadOnlySpan<T> position) =>
+                EvalFunction(position);
+
+            /// <summary>
+            /// Immediately calls the given break function.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public bool Break(TEvalType evalType, int iteration) =>
+                BreakFunction(evalType, iteration);
+
+            /// <summary>
+            /// Immediately calls the given result comparison function.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public bool CurrentIsBetter(TEvalType current, TEvalType proposed) =>
+                EvaluationComparison(current, proposed);
+        }
+
+        /// <summary>
+        /// A specialized function wrapper implementing the required CPUOptimization
+        /// interfaces to test whether to break an optimization loop or not.
+        /// </summary>
+        /// <param name="BreakFunction">
+        /// The break function to determine whether to break the solver iteration or not.
+        /// </param>
+        private readonly record struct BreakFunctionWrapper(
+            CPUOptimizationBreakFunction<TEvalType> BreakFunction) :
+            ICPUOptimizationBreakFunction<TEvalType>
+        {
+            /// <summary>
+            /// Immediately calls the given break function.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public bool Break(TEvalType evalType, int iteration) =>
+                BreakFunction(evalType, iteration);
+        }
+
+        /// <summary>
+        /// Wraps a non-intermediate-state-based optimization function.
+        /// </summary>
+        /// <typeparam name="TFunction">The stateless function to wrap.</typeparam>
+        private struct CachedOptimizationFunction<TFunction> :
+            ICPUOptimizationFunction<T, TEvalType, object>
+            where TFunction : ICPUOptimizationFunction<T, TEvalType>
+        {
+            private TFunction function;
+
+            public CachedOptimizationFunction(TFunction optimizationFunction)
+            {
+                function = optimizationFunction;
+            }
+
+            /// <summary>
+            /// Returns a shared intermediate state object.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public object CreateIntermediate() =>
+                RawComparisonWrapper.SharedIntermediateState;
+
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void InitializeIntermediate(object intermediateState) { }
+
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void FinishProcessing(object intermediateState) { }
+
+            /// <summary>
+            /// Invokes the underlying comparison function to compare current and proposed
+            /// evaluation instances.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public bool CurrentIsBetter(TEvalType current, TEvalType proposed) =>
+                function.CurrentIsBetter(current, proposed);
+
+            /// <summary>
+            /// Evaluates the given position while discarding the given intermediate
+            /// state.
+            /// </summary>
+            /// <returns>The evaluation result.</returns>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public TEvalType Evaluate(
+                ReadOnlySpan<T> position,
+                object intermediateState) =>
+                function.Evaluate(position);
+        }
+
+        #endregion
+
+        #region Instance
+
+        private readonly System.Random random;
+        private readonly int[] indices;
+
+        private readonly int[] randomOffensiveIndices;
+        private readonly int[] randomDefensiveIndices;
+
+        private readonly T[] lowerBounds;
+        private readonly T[] upperBounds;
+
+        private readonly T[] og;
+        private readonly T[] dg;
+
+        private T[] sog;
+        private T[] sdg;
+
+        private T[] nextSOG;
+        private T[] nextSDG;
+
+        private readonly int[] sogList;
+        private int sogListCounter;
+
+        private T[] positions;
+        private T[] nextPositions;
+
+        private readonly TEvalType[] evaluations;
+
+        /// <summary>
+        /// Creates a new meta optimizer instance.
+        /// </summary>
+        /// <param name="inputRandom">The input random instance.</param>
+        /// <param name="numPlayers">The number of players.</param>
+        /// <param name="numDimensions">The dimensionality of the problem.</param>
+        /// <param name="maxNumParallelThreads">
+        /// The maximum number of processing threads (if any).
+        /// </param>
+        /// <param name="numDimensionsPerStep">
+        /// The number of dimension values per batched step.
+        /// </param>
+        [SuppressMessage(
+            "Design",
+            "CA1031:Do not catch general exception types",
+            Justification = "Catch is used to initialize step sizes to logical 0.5 " +
+                "which may lead to exceptions depending on the value type")]
+        protected MetaOptimizer(
+            System.Random inputRandom,
+            int numPlayers,
+            int numDimensions,
+            int? maxNumParallelThreads,
+            int numDimensionsPerStep)
+        {
+            if (numPlayers < 1)
+                throw new ArgumentOutOfRangeException(nameof(numPlayers));
+            if (numDimensionsPerStep < 1)
+                throw new ArgumentOutOfRangeException(nameof(numDimensionsPerStep));
+
+            numPlayers = Math.Max(numPlayers, 4);
+            numPlayers += numPlayers % 2;
+
+            NumPlayers = numPlayers;
+            MaxNumWorkers = maxNumParallelThreads.HasValue
+                ? maxNumParallelThreads.Value < 1
+                    ? Environment.ProcessorCount
+                    : maxNumParallelThreads.Value
+                : -1;
+
+            // Update the number of dimensions to ensure valid padding to multiples of
+            // the vector size
+            NumDimensions = numDimensions;
+            NumPaddedDimensions = numDimensions + numDimensions % numDimensionsPerStep;
+            NumDimensionSlices = NumPaddedDimensions / numDimensionsPerStep;
+
+            random = new System.Random(inputRandom.Next());
+
+            lowerBounds = new T[NumPaddedDimensions];
+            upperBounds = new T[NumPaddedDimensions];
+
+            og = new T[NumPaddedDimensions];
+            dg = new T[NumPaddedDimensions];
+
+            sog = new T[NumPaddedDimensions];
+            sdg = new T[NumPaddedDimensions];
+
+            nextSOG = new T[NumPaddedDimensions];
+            nextSDG = new T[NumPaddedDimensions];
+
+            M = numPlayers / 2;
+            randomOffensiveIndices = new int[M];
+            randomDefensiveIndices = new int[M];
+
+            indices = new int[numPlayers];
+            sogList = new int[numPlayers];
+            positions = new T[numPlayers * NumPaddedDimensions];
+            nextPositions = new T[numPlayers * NumPaddedDimensions];
+            evaluations = new TEvalType[numPlayers];
+
+            for (int i = 0; i < numPlayers; ++i)
+            {
+                indices[i] = i;
+                if (i < M)
+                {
+                    randomOffensiveIndices[i] = i;
+                    randomDefensiveIndices[i] = i + M;
+                }
+            }
+
+            // Try to initialize the basic step sizes
+            try
+            {
+                var value2 = T.CreateSaturating(2);
+                DefensiveStepSize = T.One / value2;
+                OffensiveStepSize = T.One / value2;
+                OffensiveSOGStepSize = T.One / value2;
+            }
+            catch (Exception)
+            {
+                // We actually ignore the initialization of step sizes in this case
+            }
+        }
+
+        #endregion
+
+        #region Properties
+
+        /// <summary>
+        /// Returns the number of dimensions.
+        /// </summary>
+        public int NumDimensions { get; }
+
+        /// <summary>
+        /// Returns the number of padded dimensions.
+        /// </summary>
+        public int NumPaddedDimensions { get; }
+
+        /// <summary>
+        /// Returns the number of players.
+        /// </summary>
+        public int NumPlayers { get; }
+
+        /// <summary>
+        /// Returns the number of dimensions per processing step.
+        /// </summary>
+        private int NumDimensionSlices { get; }
+
+        /// <summary>
+        /// Returns the maximum number of parallel processing threads.
+        /// </summary>
+        private int MaxNumWorkers { get; }
+
+        /// <summary>
+        /// Returns half the number of players (referred to as M in the scope of the SGO
+        /// algorithm paper).
+        /// </summary>
+        protected int M { get; }
+
+        /// <summary>
+        /// Gets or sets lower bounds of this optimizer.
+        /// </summary>
+        public ReadOnlySpan<T> LowerBounds
+        {
+            get => lowerBounds.AsSpan()[..NumDimensions];
+            set
+            {
+                if (value.Length != NumDimensions)
+                    throw new ArgumentOutOfRangeException(nameof(value));
+                value.CopyTo(lowerBounds);
+            }
+        }
+
+        /// <summary>
+        /// Gets or sets upper bounds of this optimizer.
+        /// </summary>
+        public ReadOnlySpan<T> UpperBounds
+        {
+            get => upperBounds.AsSpan()[..NumDimensions];
+            set
+            {
+                if (value.Length != NumDimensions)
+                    throw new ArgumentOutOfRangeException(nameof(value));
+                value.CopyTo(upperBounds);
+            }
+        }
+
+        /// <summary>
+        /// Gets or sets the step size of the defensive players.
+        /// </summary>
+        public T DefensiveStepSize { get; set; }
+
+        /// <summary>
+        /// Gets or sets the step size of the offensive players.
+        /// </summary>
+        public T OffensiveStepSize { get; set; }
+
+        /// <summary>
+        /// Gets or sets the step size of the offensive players in the SOG.
+        /// </summary>
+        public T OffensiveSOGStepSize { get; set; }
+
+        #endregion
+
+        #region Methods
+
+        /// <summary>
+        /// Gets the current player position memory to operate on source values in the
+        /// current iteration.
+        /// </summary>
+        /// <param name="playerIndex">The player index.</param>
+        /// <returns>
+        /// A memory instance holding all multidimensional position information for the
+        /// given player.
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private Memory<T> GetPositionMemory(int playerIndex) =>
+            positions.AsMemory(
+                playerIndex * NumPaddedDimensions,
+                NumPaddedDimensions);
+
+        /// <summary>
+        /// Gets the current player position span to operate on source values in the
+        /// current iteration.
+        /// </summary>
+        /// <param name="playerIndex">The player index.</param>
+        /// <returns>
+        /// A span holding all multidimensional position information for the given player.
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private unsafe Span<T> GetPosition(int playerIndex)
+        {
+            ref var baseRef = ref positions.AsSpan().GetItemRef(
+                playerIndex * NumPaddedDimensions);
+            return new Span<T>(Unsafe.AsPointer(ref baseRef), NumPaddedDimensions);
+        }
+
+        /// <summary>
+        /// Gets the next position span for value updates in the next iteration.
+        /// </summary>
+        /// <param name="playerIndex">The player index.</param>
+        /// <returns>
+        /// A span holding all multidimensional position information for the given player.
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private unsafe Span<T> GetNextPosition(int playerIndex)
+        {
+            ref var baseRef = ref nextPositions.AsSpan().GetItemRef(
+                playerIndex * NumPaddedDimensions);
+            return new Span<T>(Unsafe.AsPointer(ref baseRef), NumPaddedDimensions);
+        }
+
+        /// <summary>
+        /// Gets the random offensive index corresponding to the given relative player
+        /// index.
+        /// </summary>
+        /// <param name="playerIndex">The relative input player index.</param>
+        /// <returns>An absolute random offensive index.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private int GetRandomOffensiveIndex(int playerIndex) =>
+            randomOffensiveIndices.AsSpan().GetItemRef(playerIndex);
+
+        /// <summary>
+        /// Gets the random defensive index corresponding to the given relative player
+        /// index.
+        /// </summary>
+        /// <param name="playerIndex">The relative input player index.</param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private int GetRandomDefensiveIndex(int playerIndex) =>
+            randomDefensiveIndices.AsSpan().GetItemRef(playerIndex);
+
+        /// <summary>
+        /// Resets the contents of the two given spans.
+        /// </summary>
+        /// <typeparam name="TProcessor">The processor type.</typeparam>
+        /// <typeparam name="TType">The processing type.</typeparam>
+        /// <param name="first">The first span to reset.</param>
+        /// <param name="second">The second span to reset.</param>
+        [MethodImpl(
+            MethodImplOptions.AggressiveInlining |
+            MethodImplOptions.AggressiveOptimization)]
+        private void Reset<TProcessor, TType>(Span<TType> first, Span<TType> second)
+            where TProcessor : struct, IProcessor<TProcessor, TType>
+            where TType : unmanaged
+        {
+            // Reset first and second vectors
+            var processor = TProcessor.New();
+            for (int i = 0; i < NumDimensionSlices; ++i)
+            {
+                processor.Reset(out first.GetItemRef(i));
+                processor.Reset(out second.GetItemRef(i));
+            }
+        }
+
+        /// <summary>
+        /// Accumulates information from the first source into the first target span and
+        /// from the second source into the second target span.
+        /// </summary>
+        /// <typeparam name="TProcessor">The processor type.</typeparam>
+        /// <typeparam name="TType">The processing type.</typeparam>
+        /// <param name="firstTarget">The first target span to accumulate into.</param>
+        /// <param name="secondTarget">The second target span to accumulate into.</param>
+        /// <param name="firstSource">
+        /// The first source span to get the intermediate results from.
+        /// </param>
+        /// <param name="secondSource">
+        /// The second source span to get the intermediate results from.
+        /// </param>
+        [MethodImpl(
+            MethodImplOptions.AggressiveInlining |
+            MethodImplOptions.AggressiveOptimization)]
+        private void Accumulate<TProcessor, TType>(
+            Span<TType> firstTarget,
+            Span<TType> secondTarget,
+            ReadOnlySpan<TType> firstSource,
+            ReadOnlySpan<TType> secondSource)
+            where TProcessor : struct, IProcessor<TProcessor, TType>
+            where TType : unmanaged
+        {
+            // Create new processor
+            var processor = TProcessor.New();
+
+            // Accumulate first and second vectors
+            for (int i = 0; i < NumDimensionSlices; ++i)
+            {
+                processor.Accumulate(
+                    ref firstTarget.GetItemRef(i),
+                    firstSource.GetItemRef(i));
+                processor.Accumulate(
+                    ref secondTarget.GetItemRef(i),
+                    secondSource.GetItemRef(i));
+            }
+        }
+
+        /// <summary>
+        /// Computes the average position vectors based on the given first and second
+        /// spans holding all multidimensional information.
+        /// </summary>
+        /// <typeparam name="TProcessor">The processor type.</typeparam>
+        /// <typeparam name="TType">The processing type.</typeparam>
+        /// <param name="first">The first span to compute the average for.</param>
+        /// <param name="second">The second span to compute the average for.</param>
+        /// <param name="numContributors">
+        /// The number of contributors representing the denominator of the first span.
+        /// </param>
+        /// <param name="numContributorsSecond">
+        /// The (optional) number of contributors representing the denominator of the
+        /// second span. If the number is not provided, the number will be equal to the
+        /// first number of contributors.
+        /// </param>
+        [MethodImpl(
+            MethodImplOptions.AggressiveInlining |
+            MethodImplOptions.AggressiveOptimization)]
+        private void ComputeAverage<TProcessor, TType>(
+            Span<TType> first,
+            Span<TType> second,
+            T numContributors,
+            T? numContributorsSecond = null)
+            where TProcessor : struct, IProcessor<TProcessor, TType>
+            where TType : unmanaged
+        {
+            // Create new processor
+            var processor = TProcessor.New();
+
+            // Determine second contributors
+            numContributors = T.Max(numContributors, T.One);
+            T secondContributors = T.Max(
+                numContributorsSecond ?? numContributors,
+                T.One);
+
+            // Iterate over all dimension slices
+            for (int i = 0; i < NumDimensionSlices; ++i)
+            {
+                processor.ComputeAverage(ref first.GetItemRef(i), numContributors);
+                processor.ComputeAverage(ref second.GetItemRef(i), secondContributors);
+            }
+        }
+
+        /// <summary>
+        /// Optimize the given objective function using delegates.
+        /// </summary>
+        /// <param name="evalFunction">The evaluation function.</param>
+        /// <param name="breakFunction">The break function.</param>
+        /// <param name="comparison">
+        /// The comparison functionality comparing evaluation results.
+        /// </param>
+        /// <param name="bestResult">The best known input result.</param>
+        /// <param name="bestKnownPosition">The best known position span.</param>
+        /// <returns>
+        /// A tuple consisting of the best found result and position vector.
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public (TEvalType Result, Memory<T> Position) Optimize(
+            CPUOptimizationFunction<T, TEvalType> evalFunction,
+            CPUOptimizationBreakFunction<TEvalType> breakFunction,
+            Comparison<TEvalType> comparison,
+            TEvalType bestResult,
+            ReadOnlyMemory<T>? bestKnownPosition = default)
+        {
+            var wrapper = new FunctionWrapper(
+                evalFunction,
+                breakFunction,
+                (first, second) =>
+                    comparison(first, second) >= 0);
+            return Optimize(wrapper, wrapper, bestResult, bestKnownPosition);
+        }
+
+        /// <summary>
+        /// Optimize the given objective function using delegates.
+        /// </summary>
+        /// <param name="evalFunction">The evaluation function.</param>
+        /// <param name="evaluationComparison">
+        /// The comparison function comparing evaluation results.
+        /// </param>
+        /// <param name="breakFunction">The break function.</param>
+        /// <param name="bestResult">The best known input result.</param>
+        /// <param name="bestKnownPosition">The best known position span.</param>
+        /// <returns>
+        /// A tuple consisting of the best found result and position vector.
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public (TEvalType Result, Memory<T> Position) Optimize(
+            CPUOptimizationFunction<T, TEvalType> evalFunction,
+            CPUOptimizationBreakFunction<TEvalType> breakFunction,
+            CPUEvaluationComparison<TEvalType> evaluationComparison,
+            TEvalType bestResult,
+            ReadOnlyMemory<T>? bestKnownPosition = default)
+        {
+            var wrapper = new FunctionWrapper(
+                evalFunction,
+                breakFunction,
+                evaluationComparison);
+            return Optimize(wrapper, wrapper, bestResult, bestKnownPosition);
+        }
+
+        /// <summary>
+        /// Optimize the given objective function using specialized optimization function
+        /// types.
+        /// </summary>
+        /// <param name="optimizationFunction">The optimization function.</param>
+        /// <param name="breakFunction">The break function.</param>
+        /// <param name="bestResult">The best known input result.</param>
+        /// <param name="bestKnownPosition">The best known position span.</param>
+        /// <returns>
+        /// A tuple consisting of the best found result and position vector.
+        /// </returns>
+        public (TEvalType Result, Memory<T> Position) Optimize<
+            TFunction,
+            TBreakFunction>(
+            in TFunction optimizationFunction,
+            in TBreakFunction breakFunction,
+            TEvalType bestResult,
+            ReadOnlyMemory<T>? bestKnownPosition = default)
+            where TFunction : ICPUOptimizationFunction<T, TEvalType>
+            where TBreakFunction : ICPUOptimizationBreakFunction<TEvalType> =>
+            Optimize(
+                optimizationFunction,
+                breakFunction,
+                CPUPositionModifier.GetNop<T>(),
+                bestResult,
+                bestKnownPosition);
+
+        /// <summary>
+        /// Optimize the given objective function using specialized optimization function
+        /// types.
+        /// </summary>
+        /// <param name="optimizationFunction">The optimization function.</param>
+        /// <param name="breakFunction">The break function.</param>
+        /// <param name="positionModifier">
+        /// The position modifier to apply to all position updates during optimization.
+        /// </param>
+        /// <param name="bestResult">The best known input result.</param>
+        /// <param name="bestKnownPosition">The best known position span.</param>
+        /// <returns>
+        /// A tuple consisting of the best found result and position vector.
+        /// </returns>
+        public (TEvalType Result, Memory<T> Position) Optimize<
+            TFunction,
+            TBreakFunction,
+            TModifier>(
+            in TFunction optimizationFunction,
+            in TBreakFunction breakFunction,
+            in TModifier positionModifier,
+            TEvalType bestResult,
+            ReadOnlyMemory<T>? bestKnownPosition = default)
+            where TFunction : ICPUOptimizationFunction<T, TEvalType>
+            where TBreakFunction : ICPUOptimizationBreakFunction<TEvalType>
+            where TModifier : ICPUPositionModifier<T>
+        {
+            var cachedFunctionWrapper = new CachedOptimizationFunction<TFunction>(
+                optimizationFunction);
+            return Optimize<
+                CachedOptimizationFunction<TFunction>,
+                object,
+                TBreakFunction,
+                TModifier>(
+                cachedFunctionWrapper,
+                breakFunction,
+                positionModifier,
+                bestResult,
+                bestKnownPosition);
+        }
+
+        /// <summary>
+        /// Optimize the given objective function using specialized optimization function
+        /// types.
+        /// </summary>
+        /// <typeparam name="TFunction">The optimization function type.</typeparam>
+        /// <typeparam name="TIntermediate">
+        /// The intermediate optimization state type.
+        /// </typeparam>
+        /// <typeparam name="TBreakFunction">The break function type.</typeparam>
+        /// <typeparam name="TModifier">The position modifier type.</typeparam>
+        /// <param name="optimizationFunction">The optimization function.</param>
+        /// <param name="breakFunction">The break function.</param>
+        /// <param name="positionModifier">
+        /// The position modifier to apply to all position updates during optimization.
+        /// </param>
+        /// <param name="bestResult">The best known input result.</param>
+        /// <param name="bestKnownPosition">The best known position span.</param>
+        /// <returns>
+        /// A tuple consisting of the best found result and position vector.
+        /// </returns>
+        public abstract (TEvalType Result, Memory<T> Position) Optimize<
+            TFunction,
+            TIntermediate,
+            TBreakFunction,
+            TModifier>(
+            in TFunction optimizationFunction,
+            in TBreakFunction breakFunction,
+            in TModifier positionModifier,
+            TEvalType bestResult,
+            ReadOnlyMemory<T>? bestKnownPosition = default)
+            where TFunction : ICPUOptimizationFunction<T, TEvalType, TIntermediate>
+            where TIntermediate : class
+            where TBreakFunction : ICPUOptimizationBreakFunction<TEvalType>
+            where TModifier : ICPUPositionModifier<T>;
+
+        /// <summary>
+        /// Optimize the given objective function using specialized optimization function
+        /// types. This overload uses raw optimization function callbacks to implement
+        /// extremely customizable optimization functions on top of the current stack.
+        /// </summary>
+        /// <param name="optimizationFunction">The optimization function.</param>
+        /// <param name="breakFunction">The break function.</param>
+        /// <param name="evaluationComparison">
+        /// The comparison function comparing evaluation results.
+        /// </param>
+        /// <param name="bestResult">The best known input result.</param>
+        /// <param name="bestKnownPosition">The best known position span.</param>
+        /// <returns>
+        /// A tuple consisting of the best found result and position vector.
+        /// </returns>
+        public abstract (TEvalType Result, Memory<T> Position) OptimizeRaw(
+            RawCPUOptimizationFunction<T, TEvalType> optimizationFunction,
+            CPUOptimizationBreakFunction<TEvalType> breakFunction,
+            CPUEvaluationComparison<TEvalType> evaluationComparison,
+            TEvalType bestResult,
+            ReadOnlyMemory<T>? bestKnownPosition = default);
+
+        /// <summary>
+        /// Copies all current positions to all next positions.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private void CopyPositions()
+        {
+            var positionsSpans = positions.AsSpan();
+            var nextPositionsSpan = nextPositions.AsSpan();
+            positionsSpans.CopyTo(nextPositionsSpan);
+        }
+
+        /// <summary>
+        /// Permutes internal index arrays.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private void Permute()
+        {
+            for (int i = NumPlayers - 1; i > 1; --i)
+            {
+                int j = random.Next(i + 1);
+                Utilities.Swap(ref indices[i], ref indices[j]);
+            }
+
+            for (int i = M - 1; i > 1; --i)
+            {
+                int j = random.Next(i + 1);
+                Utilities.Swap(
+                    ref randomOffensiveIndices[i],
+                    ref randomOffensiveIndices[j]);
+
+                int k = random.Next(i + 1);
+                Utilities.Swap(
+                    ref randomDefensiveIndices[i],
+                    ref randomDefensiveIndices[k]);
+            }
+        }
+
+        /// <summary>
+        /// Initializes the internal SOG list for the current iteration.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private void InitSOGList()
+        {
+            // Reset SOG list
+            Interlocked.Exchange(ref sogListCounter, 0);
+#if DEBUG
+            Array.Clear(sogList);
+#endif
+        }
+
+        /// <summary>
+        /// Swaps all intermediate buffers for the next iteration.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private void SwapBuffers()
+        {
+            // Swap current and next positions
+            Utilities.Swap(ref positions, ref nextPositions);
+
+            // Swap current SOG and SDG vectors
+            Utilities.Swap(ref sog, ref nextSOG);
+            Utilities.Swap(ref sdg, ref nextSDG);
+        }
+
+        #endregion
+    }
+}
+
+#endif
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/OptimizationFunction.cs b/Src/ILGPU.Algorithms/Optimization/CPU/OptimizationFunction.cs
new file mode 100644
index 0000000000..9801b56223
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/OptimizationFunction.cs
@@ -0,0 +1,142 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: OptimizationFunction.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU.Util;
+using System;
+using System.Numerics;
+using System.Threading.Tasks;
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    /// <summary>
+    /// Represents a generic optimization function to be used with CPU-specific parts
+    /// of the optimization library.
+    /// </summary>
+    /// <typeparam name="T">The main element type for all position vectors.</typeparam>
+    /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+    public delegate TEvalType CPUOptimizationFunction<T, out TEvalType>(
+        ReadOnlySpan<T> position)
+        where T : struct
+        where TEvalType : struct, IEquatable<TEvalType>;
+
+    /// <summary>
+    /// A raw optimization function operating on all positions and evaluation values
+    /// directly to implement specialized and highly domain-specific evaluators.
+    /// </summary>
+    /// <typeparam name="T">The main element type for all position vectors.</typeparam>
+    /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+    /// <param name="allPositions">
+    /// A memory instance pointing to all packed position vectors of all particles.
+    /// </param>
+    /// <param name="evaluations">
+    /// A memory instance pointing to all evaluation values of all particles.
+    /// </param>
+    /// <param name="numDimensions">The number of dimensions.</param>
+    /// <param name="numPaddedDimensions">
+    /// The number of padded dimensions taking vectorization into account.
+    /// </param>
+    /// <param name="numParticles">The number of particles.</param>
+    /// <param name="positionStride">
+    /// The position stride to be used to compute individual vector elements. In this
+    /// scope, the X dimension refers to the number of players and the Y dimension
+    /// is equal to the number of padded dimensions.
+    /// </param>
+    /// <param name="options">
+    /// Parallel processing options to be used if further parallel processing is desired.
+    /// </param>
+    public delegate void RawCPUOptimizationFunction<T, TEvalType>(
+        Memory<T> allPositions,
+        Memory<TEvalType> evaluations,
+        int numDimensions,
+        int numPaddedDimensions,
+        int numParticles,
+        Stride2D.DenseY positionStride,
+        ParallelOptions options);
+
+    /// <summary>
+    /// A custom break function to break the optimization loop at some point. Returns
+    /// true if the optimization loop should be stopped.
+    /// </summary>
+    /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+    public delegate bool CPUOptimizationBreakFunction<TEvalType>(
+        TEvalType evalType,
+        int iteration);
+
+    /// <summary>
+    /// Represents a comparison function operating on evaluation types. If the first
+    /// value is considered to be better than the second one, true will be returned by
+    /// this function.
+    /// </summary>
+    /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+    public delegate bool CPUEvaluationComparison<TEvalType>(
+        TEvalType first,
+        TEvalType second);
+
+    /// <summary>
+    /// An abstract optimization function to be used with CPU-specific optimizers.
+    /// </summary>
+    /// <typeparam name="T">The main element type for all position vectors.</typeparam>
+    /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+    public interface ICPUOptimizationFunction<T, TEvalType> :
+        IBaseOptimizationFunction<TEvalType>
+        where T : struct
+        where TEvalType : struct, IEquatable<TEvalType>
+    {
+        /// <summary>
+        /// Evaluates the given position vector.
+        /// </summary>
+        /// <param name="position">The position span.</param>
+        /// <returns>The resulting evaluation value.</returns>
+        TEvalType Evaluate(ReadOnlySpan<T> position);
+    }
+
+    /// <summary>
+    /// An abstract optimization function to be used with CPU-specific optimizers.
+    /// </summary>
+    /// <typeparam name="T">The main element type for all position vectors.</typeparam>
+    /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+    /// <typeparam name="TIntermediate">
+    /// The type of all intermediate states during processing.
+    /// </typeparam>
+    public interface ICPUOptimizationFunction<T, TEvalType, TIntermediate> :
+        IBaseOptimizationFunction<TEvalType>,
+        IParallelCache<TIntermediate>
+        where T : struct
+        where TIntermediate : class
+        where TEvalType : struct, IEquatable<TEvalType>
+    {
+        /// <summary>
+        /// Evaluates the given position vector.
+        /// </summary>
+        /// <param name="position">The position span.</param>
+        /// <param name="intermediateState">The intermediate processing state.</param>
+        /// <returns>The resulting evaluation value.</returns>
+        TEvalType Evaluate(ReadOnlySpan<T> position, TIntermediate intermediateState);
+    }
+
+    /// <summary>
+    /// An abstract optimizer break logic to realize custom iteration logic.
+    /// </summary>
+    /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+    public interface ICPUOptimizationBreakFunction<TEvalType>
+        where TEvalType : struct
+    {
+        /// <summary>
+        /// Tests the given evaluation type and the current iteration to enable the
+        /// implementation of custom optimizer break functionality and returns true if
+        /// the current optimizer process should be terminated.
+        /// </summary>
+        /// <param name="evalType">The best found evaluation result so far.</param>
+        /// <param name="iteration">The current solver iteration.</param>
+        /// <returns>True if the current solver iteration should be terminated.</returns>
+        bool Break(TEvalType evalType, int iteration);
+    }
+}
diff --git a/Src/ILGPU.Algorithms/Optimization/CPU/PositionModifier.cs b/Src/ILGPU.Algorithms/Optimization/CPU/PositionModifier.cs
new file mode 100644
index 0000000000..1cc3f41f44
--- /dev/null
+++ b/Src/ILGPU.Algorithms/Optimization/CPU/PositionModifier.cs
@@ -0,0 +1,162 @@
+// ---------------------------------------------------------------------------------------
+//                                   ILGPU Algorithms
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: PositionModifier.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using System;
+using System.Runtime.CompilerServices;
+
+namespace ILGPU.Algorithms.Optimization.CPU
+{
+    /// <summary>
+    /// Represents an abstract modifier for player/particle positions during optimization.
+    /// This allows users to implement specific clamping, rounding, or adjustments
+    /// during an optimization run.
+    /// </summary>
+    /// <typeparam name="T">The element type.</typeparam>
+    public interface ICPUPositionModifier<T>
+        where T : unmanaged
+    {
+        /// <summary>
+        /// Adjusts the given player/particle position according to user- and domain-
+        /// specific constraints.
+        /// </summary>
+        /// <param name="index">The current player/particle index.</param>
+        /// <param name="position">The position to adjust (if desired).</param>
+        /// <param name="numDimensions">The raw dimensions of the input problem.</param>
+        /// <param name="numPaddedDimensions">The padded number of dimensions.</param>
+        /// <remarks>
+        /// The length of the position memory will be equal to the input problem
+        /// dimension in case of a scalar optimizer. If the optimizer has been created
+        /// for vector-based execution, the position memory length will be padded
+        /// according to the vector length. If you want to use vector instructions inside
+        /// this function, make sure to create a vectorized optimizer or account for
+        /// non-optimized memory lengths.
+        /// </remarks>
+        void AdjustPosition(
+            int index,
+            Memory<T> position,
+            int numDimensions,
+            int numPaddedDimensions);
+    }
+
+    /// <summary>
+    /// Static utility class for <see cref="ICPUPositionModifier{T}"/> interfaces.
+    /// </summary>
+    public static class CPUPositionModifier
+    {
+        /// <summary>
+        /// Represents a nop position modifier.
+        /// </summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        public readonly struct Nop<T> : ICPUPositionModifier<T>
+            where T : unmanaged
+        {
+            /// <summary>
+            /// Does not perform any operation.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void AdjustPosition(
+                int index,
+                Memory<T> position,
+                int numDimensions,
+                int numPaddedDimensions)
+            { }
+        }
+
+        /// <summary>
+        /// Rounds floating point values according to the given number of digits.
+        /// </summary>
+        /// <param name="NumDigits">The number of digits to round to.</param>
+        /// <param name="MidpointRounding">The midpoint rounding mode.</param>
+        public readonly record struct FloatRoundingModifier(
+            int NumDigits,
+            MidpointRounding MidpointRounding) :
+            ICPUPositionModifier<float>
+        {
+            /// <summary>
+            /// Rounds the given position according to the specified number of digits.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void AdjustPosition(
+                int index,
+                Memory<float> position,
+                int numDimensions,
+                int numPaddedDimensions)
+            {
+                var span = position.Span;
+                for (int i = 0; i < numDimensions; ++i)
+                    span[i] = XMath.Round(span[i], NumDigits, MidpointRounding);
+            }
+        }
+
+        /// <summary>
+        /// Rounds floating point values according to the given number of digits.
+        /// </summary>
+        /// <param name="NumDigits">The number of digits to round to.</param>
+        /// <param name="MidpointRounding">The midpoint rounding mode.</param>
+        public readonly record struct DoubleRoundingModifier(
+            int NumDigits,
+            MidpointRounding MidpointRounding) :
+            ICPUPositionModifier<double>
+        {
+            /// <summary>
+            /// Rounds the given position according to the specified number of digits.
+            /// </summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void AdjustPosition(
+                int index,
+                Memory<double> position,
+                int numDimensions,
+                int numPaddedDimensions)
+            {
+                var span = position.Span;
+                for (int i = 0; i < numDimensions; ++i)
+                    span[i] = XMath.Round(span[i], NumDigits, MidpointRounding);
+            }
+        }
+
+        /// <summary>
+        /// Returns a new no-operation CPU position modifier.
+        /// </summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <returns>A new Nop position modifier.</returns>
+        public static Nop<T> GetNop<T>() where T : unmanaged => new();
+
+        /// <summary>
+        /// Returns a new float rounding modifier.
+        /// </summary>
+        /// <param name="numDigits">The number of digits to round to.</param>
+        /// <param name="midpointRounding">The midpoint rounding mode.</param>
+        /// <returns>A new rounding modifier.</returns>
+        public static FloatRoundingModifier GetFloatRounding(
+            int numDigits,
+            MidpointRounding midpointRounding = MidpointRounding.ToEven)
+        {
+            if (numDigits < 0)
+                throw new ArgumentOutOfRangeException(nameof(numDigits));
+            return new(numDigits, midpointRounding);
+        }
+
+        /// <summary>
+        /// Returns a new double rounding modifier.
+        /// </summary>
+        /// <param name="numDigits">The number of digits to round to.</param>
+        /// <param name="midpointRounding">The midpoint rounding mode.</param>
+        /// <returns>A new rounding modifier.</returns>
+        public static DoubleRoundingModifier GetDoubleRounding(
+            int numDigits,
+            MidpointRounding midpointRounding = MidpointRounding.ToEven)
+        {
+            if (numDigits < 0)
+                throw new ArgumentOutOfRangeException(nameof(numDigits));
+            return new(numDigits, midpointRounding);
+        }
+    }
+}
\ No newline at end of file
diff --git a/Src/ILGPU.Algorithms/Optimization/IOptimizationFunction.cs b/Src/ILGPU.Algorithms/Optimization/IOptimizationFunction.cs
index d5e2ad0b12..bbaf7245be 100644
--- a/Src/ILGPU.Algorithms/Optimization/IOptimizationFunction.cs
+++ b/Src/ILGPU.Algorithms/Optimization/IOptimizationFunction.cs
@@ -13,10 +13,28 @@
 using System;
 using System.Numerics;
 
-#if NET7_0_OR_GREATER
-
 namespace ILGPU.Algorithms.Optimization
 {
+    /// <summary>
+    /// An abstract optimization function supporting comparisons between evaluation types.
+    /// </summary>
+    /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
+    public interface IBaseOptimizationFunction<TEvalType>
+        where TEvalType : struct, IEquatable<TEvalType>
+    {
+        /// <summary>
+        /// Compares the current evaluation value with the proposed one and returns true
+        /// if the current one is considered better in any way.
+        /// </summary>
+        /// <param name="current">The currently known value.</param>
+        /// <param name="proposed">The proposed evaluation value.</param>
+        /// <returns>
+        /// True if the current value is considered better than the proposed value.
+        /// </returns>
+        bool CurrentIsBetter(TEvalType current, TEvalType proposed);
+    }
+
+#if NET7_0_OR_GREATER
     /// <summary>
     /// A generic optimization function that defines the objective of an optimization
     /// process using evaluation and comparison methods.
@@ -24,7 +42,8 @@ namespace ILGPU.Algorithms.Optimization
     /// <typeparam name="TNumericType">The vectorized numeric type.</typeparam>
     /// <typeparam name="TElementType">The element type of a numeric type.</typeparam>
     /// <typeparam name="TEvalType">The evaluation data type.</typeparam>
-    public interface IOptimizationFunction<TNumericType, TElementType, TEvalType>
+    public interface IOptimizationFunction<TNumericType, TElementType, TEvalType> :
+        IBaseOptimizationFunction<TEvalType>
         where TNumericType : unmanaged, IVectorType<TNumericType, TElementType>
         where TElementType : unmanaged, INumber<TElementType>
         where TEvalType : unmanaged, IEquatable<TEvalType>
@@ -43,18 +62,7 @@ TEvalType Evaluate(
             LongIndex1D index,
             Index1D dimension,
             SingleVectorView<TNumericType> positionView);
-
-        /// <summary>
-        /// Compares the current evaluation value with the proposed one and returns true
-        /// if the current one is considered better in any way.
-        /// </summary>
-        /// <param name="current">The currently known value.</param>
-        /// <param name="proposed">The proposed evaluation value.</param>
-        /// <returns>
-        /// True if the current value is considered better than the proposed value.
-        /// </returns>
-        bool CurrentIsBetter(TEvalType current, TEvalType proposed);
     }
+#endif
 }
 
-#endif
diff --git a/Src/ILGPU/Util/ParallelCache.cs b/Src/ILGPU/Util/ParallelCache.cs
new file mode 100644
index 0000000000..ecb4adbb96
--- /dev/null
+++ b/Src/ILGPU/Util/ParallelCache.cs
@@ -0,0 +1,378 @@
+// ---------------------------------------------------------------------------------------
+//                                        ILGPU
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: ParallelCache.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+using System.Threading.Tasks;
+
+namespace ILGPU.Util
+{
+    /// <summary>
+    /// Represents a parallel object cache to be used in combination with a
+    /// <see cref="Parallel"/> for implementation to avoid unnecessary temporary object
+    /// creation.
+    /// </summary>
+    /// <typeparam name="T">The type of the elements to cache.</typeparam>
+    public abstract class ParallelCache<T> : DisposeBase, IParallelCache<T>
+        where T : class
+    {
+        #region Instance
+
+        private InlineList<T> cache;
+        private InlineList<T> used;
+
+        /// <summary>
+        /// Creates a new parallel cache.
+        /// </summary>
+        /// <param name="initialCapacity">
+        /// The initial number of processing threads (if any).
+        /// </param>
+        protected ParallelCache(int? initialCapacity = null)
+        {
+            int capacity = initialCapacity ?? Environment.ProcessorCount * 2;
+            cache = InlineList<T>.Create(capacity);
+            used = InlineList<T>.Create(capacity);
+
+            LocalInitializer = GetOrCreate;
+            LocalFinalizer = FinishProcessing;
+        }
+
+        #endregion
+
+        #region Properties
+
+        /// <summary>
+        /// Returns the underlying sync root object.
+        /// </summary>
+        public object SyncRoot { get; } = new object();
+
+        /// <summary>
+        /// Returns the local initializer function.
+        /// </summary>
+        public Func<T> LocalInitializer { get; }
+
+        /// <summary>
+        /// Returns the local finalizer action.
+        /// </summary>
+        public Action<T> LocalFinalizer { get; }
+
+        /// <summary>
+        /// Returns the underlying used intermediates.
+        /// </summary>
+        protected ReadOnlySpan<T> Used => used;
+
+        #endregion
+
+        #region Methods
+
+        /// <summary>
+        /// Initializes this parallel cache of the next parallel operation.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void InitializeProcessing()
+        {
+            // This method does not perform an operation at the moment but this may
+            // change in the future. For this reason, this (empty) method remains here
+            // and should be called in all cases prior to calling GetOrCreate().
+        }
+
+        /// <summary>
+        /// Gets or creates a new intermediate array tuple storing information for the
+        /// upcoming optimizer iteration.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public T GetOrCreate()
+        {
+            // Checks the cache contents to retrieve previously
+            T intermediate;
+            lock (SyncRoot)
+            {
+                if (cache.Count > 0)
+                {
+                    int lastIndex = cache.Count - 1;
+                    intermediate = cache[lastIndex];
+                    cache.RemoveAt(lastIndex);
+                }
+                else
+                {
+                    // Create a new intermediate result
+                    intermediate = CreateIntermediate();
+                }
+            }
+
+            // Initialize intermediate result and return
+            InitializeIntermediate(intermediate);
+
+            // Add to our list of used intermediates
+            lock (SyncRoot)
+                used.Add(intermediate);
+
+            return intermediate;
+        }
+
+        /// <summary>
+        /// Finishes a parallel processing step.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void FinishProcessing()
+        {
+            // Return all used intermediates to the cache
+            cache.AddRange(used);
+            used.Clear();
+
+        }
+
+        /// <summary>
+        /// Creates a new intermediate instance without initializing it properly.
+        /// </summary>
+        /// <returns>The created intermediate state.</returns>
+        protected abstract T CreateIntermediate();
+
+        /// <summary>
+        /// Initializes the given intermediate state in order to prepare it for
+        /// processing.
+        /// </summary>
+        /// <param name="intermediateState">The intermediate state to prepare.</param>
+        protected virtual void InitializeIntermediate(T intermediateState) { }
+
+        /// <summary>
+        /// Finishes processing of the current thread while getting an intermediate state.
+        /// </summary>
+        /// <param name="intermediateState">The intermediate state to operate on.</param>
+        protected virtual void FinishProcessing(T intermediateState) { }
+
+        #endregion
+
+        #region IParallelCache
+
+        /// <summary>
+        /// Creates a new intermediate instance without initializing it properly.
+        /// </summary>
+        /// <returns>The created intermediate state.</returns>
+        T IParallelCache<T>.CreateIntermediate() => CreateIntermediate();
+
+        /// <summary>
+        /// Initializes the given intermediate state in order to prepare it for
+        /// processing.
+        /// </summary>
+        /// <param name="intermediateState">The intermediate state to prepare.</param>
+        void IParallelCache<T>.InitializeIntermediate(T intermediateState) =>
+            InitializeIntermediate(intermediateState);
+
+        /// <summary>
+        /// Finishes processing of the current thread while getting an intermediate state.
+        /// </summary>
+        /// <param name="intermediateState">The intermediate state to operate on.</param>
+        void IParallelCache<T>.FinishProcessing(T intermediateState) =>
+            FinishProcessing(intermediateState);
+
+        #endregion
+
+        #region IDisposable
+
+        /// <summary>
+        /// Disposes all created intermediate states (if required).
+        /// </summary>
+        protected override void Dispose(bool disposing)
+        {
+            // Check whether we need to dispose all elements
+            if (cache.Count >  0 && typeof(IDisposable).IsAssignableFrom(typeof(T)))
+            {
+                foreach (var intermediateStates in cache)
+                    intermediateStates.AsNotNullCast<IDisposable>().Dispose();
+            }
+            base.Dispose(disposing);
+        }
+
+        #endregion
+    }
+
+    /// <summary>
+    /// An abstract parallel cache interface operating on intermediate states.
+    /// </summary>
+    /// <typeparam name="T">The type of all intermediate states.</typeparam>
+    public interface IParallelCache<T>
+    {
+        /// <summary>
+        /// Creates a new intermediate instance without initializing it properly.
+        /// </summary>
+        /// <returns>The created intermediate state.</returns>
+        T CreateIntermediate();
+
+        /// <summary>
+        /// Initializes the given intermediate state in order to prepare it for
+        /// processing.
+        /// </summary>
+        /// <param name="intermediateState">The intermediate state to prepare.</param>
+        void InitializeIntermediate(T intermediateState);
+
+        /// <summary>
+        /// Finishes processing of the current thread while getting an intermediate state.
+        /// </summary>
+        /// <param name="intermediateState">The intermediate state to operate on.</param>
+        void FinishProcessing(T intermediateState);
+    }
+
+    /// <summary>
+    /// An abstract parallel processing body representing a function to be executed
+    /// concurrently on a given value range. It operates on intermediate values that are
+    /// managed by its surrounding processing cache.
+    /// </summary>
+    /// <typeparam name="T">The type of all intermediate states.</typeparam>
+    public interface IParallelProcessingBody<T>
+        where T : class
+    {
+        /// <summary>
+        /// Initializes this processing body to prepare the upcoming parallel processing
+        /// steps.
+        /// </summary>
+        void Initialize();
+
+        /// <summary>
+        /// Processes a single element concurrently while accepting an intermediate state
+        /// on which this body operates on.
+        /// </summary>
+        /// <param name="index">The current processing element index.</param>
+        /// <param name="loopState">The parallel loop state (if any).</param>
+        /// <param name="intermediateState">
+        /// The current intermediate state for this thread.
+        /// </param>
+        void Process(
+            int index,
+            ParallelLoopState? loopState,
+            T intermediateState);
+
+        /// <summary>
+        /// Finalizes the current body operating while having the ability to inspect all
+        /// previously used intermediate states.
+        /// </summary>
+        /// <param name="intermediateStates">
+        /// A span referring to all previously used intermediate states.
+        /// </param>
+        void Finalize(ReadOnlySpan<T> intermediateStates);
+    }
+
+    /// <summary>
+    /// Static helpers for parallel processing extensions.
+    /// </summary>
+    public static class ParallelProcessing
+    {
+        /// <summary>
+        /// Gets or sets whether debug mode is enabled. Note that this assignment needs to
+        /// be changes before the first <see cref="ParallelProcessingCache{T,TBody}"/>
+        /// instance has been created since the flag is cached locally to enable JIT
+        /// optimizations.
+        /// </summary>
+        public static bool DebugMode { get; set; }
+    }
+
+    /// <summary>
+    /// Represents a parallel object cache to be used in combination with a
+    /// <see cref="Parallel"/> for implementation to avoid unnecessary temporary object
+    /// creation. Furthermore, this implementation operates on specialized body instances
+    /// to avoid virtual function calls in each processing step.
+    /// </summary>
+    /// <typeparam name="T">The type of the elements to cache.</typeparam>
+    /// <typeparam name="TBody">The type of the custom loop body instance.</typeparam>
+    public abstract class ParallelProcessingCache<T, TBody> : ParallelCache<T>
+        where T : class
+        where TBody : IParallelProcessingBody<T>
+    {
+        /// <summary>
+        /// Returns true if the debug mode is enabled for all parallel processing
+        /// operations.
+        /// </summary>
+        private static readonly bool DebugMode = ParallelProcessing.DebugMode;
+
+        private readonly Func<int, ParallelLoopState?, T, T> body;
+        private readonly TBody bodyImplementation;
+        private readonly ParallelOptions defaultOptions = new();
+
+        /// <summary>
+        /// Creates a new parallel processing cache operating on intermediate states.
+        /// </summary>
+        /// <param name="initialCapacity">
+        /// The initial number of processing threads (if any).
+        /// </param>
+        [SuppressMessage(
+            "Usage",
+            "CA2214:Do not call overridable methods in constructors",
+            Justification = "This method is called here as it represents an abstract " +
+                "static factory method")]
+        protected ParallelProcessingCache(int? initialCapacity = null)
+            : base(initialCapacity)
+        {
+            bodyImplementation = CreateBody();
+            body = (i, state, intermediate) =>
+            {
+                bodyImplementation.Process(i, state, intermediate);
+                return intermediate;
+            };
+        }
+
+        /// <summary>
+        /// Creates the required parallel processing body to be used.
+        /// </summary>
+        /// <returns>The processing body to use.</returns>
+        protected abstract TBody CreateBody();
+
+        /// <summary>
+        /// Performs the current operation in parallel.
+        /// </summary>
+        /// <param name="fromInclusive">The inclusive start index.</param>
+        /// <param name="toExclusive">The exclusive end index.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void ParallelFor(int fromInclusive, int toExclusive) =>
+            ParallelFor(fromInclusive, toExclusive, defaultOptions);
+
+        /// <summary>
+        /// Performs the current operation in parallel.
+        /// </summary>
+        /// <param name="fromInclusive">The inclusive start index.</param>
+        /// <param name="toExclusive">The exclusive end index.</param>
+        /// <param name="options">The parallel execution options.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void ParallelFor(
+            int fromInclusive,
+            int toExclusive,
+            ParallelOptions options)
+        {
+            // Initialize processing cache
+            InitializeProcessing();
+
+            // Initialize operation
+            bodyImplementation.Initialize();
+
+            // Check for enabled debug mode
+            if (DebugMode)
+            {
+                var intermediate = GetOrCreate();
+                for (int i = fromInclusive; i < toExclusive; ++i)
+                    body(i, null, intermediate);
+            }
+            else
+            {
+                Parallel.For(
+                    fromInclusive,
+                    toExclusive,
+                    options,
+                    LocalInitializer,
+                    body,
+                    LocalFinalizer);
+            }
+
+            // Finalize operation
+            bodyImplementation.Finalize(Used);
+            FinishProcessing();
+        }
+    }
+}