From 86ac67c2dc796a2e61b665412b209c3935f87ff8 Mon Sep 17 00:00:00 2001 From: Excel-lent <99126980+Excel-lent@users.noreply.github.com> Date: Wed, 11 May 2022 19:32:41 +0200 Subject: [PATCH] Disposing functions are added. Description is enhanced. --- C#/ClooWrapperVBA/ProgramDevice.cs | 133 ++++++++++++++++++++++++----- C#/Installation script.iss | 2 +- Excel/Asynchronous.bas | 27 ++++-- Excel/Performance.bas | 89 ++++++++++++------- Readme.md | 24 ++++-- 5 files changed, 203 insertions(+), 72 deletions(-) diff --git a/C#/ClooWrapperVBA/ProgramDevice.cs b/C#/ClooWrapperVBA/ProgramDevice.cs index 688ac31..d91d9f1 100644 --- a/C#/ClooWrapperVBA/ProgramDevice.cs +++ b/C#/ClooWrapperVBA/ProgramDevice.cs @@ -1,7 +1,6 @@ using Cloo; using System; using System.Collections.Generic; -using System.ComponentModel; using System.Runtime.InteropServices; using System.Threading; @@ -175,16 +174,42 @@ bool ExecuteAsync(ref int[] globalWorkOffset, ref int[] globalWorkSize, ref int[ #endregion GetArguments + #region Destructors + /// - /// Device type of used device ("GPU" / "CPU"). + /// Disposes kernel memory variable. /// + /// 0-based index of argument in argument list. + /// True, if the operation was successful, false otherwise. [DispId(16)] + bool ReleaseMemObject(int argument_index); + + /// + /// Disposes kernel. + /// + /// True, if the operation was successful, false otherwise. + [DispId(17)] + bool ReleaseKernel(); + + /// + /// Disposes ComputeProgram, ComputeCommandQueue and CommandQueue. + /// + /// True, if the operation was successful, false otherwise. + [DispId(18)] + bool ReleaseProgram(); + + #endregion Destructors + + /// + /// Device type of used device ("GPU" / "CPU"). + /// + [DispId(19)] string DeviceType { get; set; } /// /// Error string. /// - [DispId(17)] + [DispId(20)] string ErrorString { get; set; } } @@ -193,7 +218,7 @@ bool ExecuteAsync(ref int[] globalWorkOffset, ref int[] globalWorkSize, ref int[ [ClassInterface(ClassInterfaceType.None)] public class ProgramDevice : IProgramDevice { - public ComputeProgram Prog; + public ComputeProgram ComputeProgram; public ComputeContext ComputeContext; public ComputeCommandQueue ComputeCommandQueue = null; private ComputeKernel kernel; @@ -212,7 +237,7 @@ public bool CreateKernel(string method) { try { - kernel = Prog.CreateKernel(method); + kernel = ComputeProgram.CreateKernel(method); variablePointers = new Dictionary(); return true; } @@ -272,11 +297,11 @@ public bool Build(string sourceCode, string options, int platformIndex, int devi return false; } - Prog = new ComputeProgram(ComputeContext, sourceCode); + ComputeProgram = new ComputeProgram(ComputeContext, sourceCode); try { - Prog.Build(null, options, null, IntPtr.Zero); + ComputeProgram.Build(null, options, null, IntPtr.Zero); } catch (Exception e) { @@ -285,7 +310,7 @@ public bool Build(string sourceCode, string options, int platformIndex, int devi return false; } - buildLogs = Prog.GetBuildLog(ComputeContext.Devices[deviceTypeIndex]); + buildLogs = ComputeProgram.GetBuildLog(ComputeContext.Devices[deviceTypeIndex]); return true; } @@ -295,7 +320,7 @@ public bool Build(string sourceCode, string options, int platformIndex, int devi /// /// Writes an array of type "Long" to the device. /// - /// The argument index. + /// 0-based index of argument in argument list. /// Array of "Long". /// True, if the operation was successful, false otherwise. public bool SetMemoryArgument_Long(int argument_index, ref int[] values) @@ -320,7 +345,7 @@ public bool SetMemoryArgument_Long(int argument_index, ref int[] values) /// /// Writes an array of type "Single" to the device. /// - /// The argument index. + /// 0-based index of argument in argument list. /// Array of "Single". /// True, if the operation was successful, false otherwise. public bool SetMemoryArgument_Single(int argument_index, ref float[] values) @@ -345,7 +370,7 @@ public bool SetMemoryArgument_Single(int argument_index, ref float[] values) /// /// Writes an array of type "Double" to the device. /// - /// The argument index. + /// 0-based index of argument in argument list. /// Array of "Double". /// True, if the operation was successful, false otherwise. public bool SetMemoryArgument_Double(int argument_index, ref double[] values) @@ -370,7 +395,7 @@ public bool SetMemoryArgument_Double(int argument_index, ref double[] values) /// /// Sets "Long" argument to the kernel. /// - /// The argument index. + /// 0-based index of argument in argument list. /// Argument value as "Long". /// True, if the operation was successful, false otherwise. public bool SetValueArgument_Long(int argument_index, int value_long) @@ -395,7 +420,7 @@ public bool SetValueArgument_Long(int argument_index, int value_long) /// /// Sets "Single" argument to the kernel. /// - /// The argument index. + /// 0-based index of argument in argument list. /// Argument value as "Single". /// True, if the operation was successful, false otherwise. public bool SetValueArgument_Single(int argument_index, float value_single) @@ -420,7 +445,7 @@ public bool SetValueArgument_Single(int argument_index, float value_single) /// /// Sets "Double" argument to the kernel. /// - /// The argument index. + /// 0-based index of argument in argument list. /// Argument value as "Double". /// True, if the operation was successful, false otherwise. public bool SetValueArgument_Double(int argument_index, double value_double) @@ -587,10 +612,10 @@ private bool InitGlobalArrays(ref int[] globalWorkOffset, ref int[] globalWorkSi /// /// Reads an array of type "Long" from the device. /// - /// 0-based number of argument in argument list. + /// 0-based index of argument in argument list. /// Array of "Long". /// False in case of error/exception. Otherwise true. - public bool GetMemoryArgument_Long(int varIndex, ref int[] values) + public bool GetMemoryArgument_Long(int argument_index, ref int[] values) { try { @@ -599,7 +624,7 @@ public bool GetMemoryArgument_Long(int varIndex, ref int[] values) fixed (int* p = (int[])values) { IntPtr ptr = (IntPtr)p; - ComputeCommandQueue.Read((ComputeBuffer)variablePointers[varIndex], true, 0L, values.Length, ptr, null); + ComputeCommandQueue.Read((ComputeBuffer)variablePointers[argument_index], true, 0L, values.Length, ptr, null); } } return true; @@ -615,10 +640,10 @@ public bool GetMemoryArgument_Long(int varIndex, ref int[] values) /// /// Reads an array of type "Single" from the device. /// - /// 0-based number of argument in argument list. + /// 0-based index of argument in argument list. /// Array of "Single". /// False in case of error/exception. Otherwise true. - public bool GetMemoryArgument_Single(int varIndex, ref float[] values) + public bool GetMemoryArgument_Single(int argument_index, ref float[] values) { try { @@ -627,7 +652,7 @@ public bool GetMemoryArgument_Single(int varIndex, ref float[] values) fixed (float* p = (float[])values) { IntPtr ptr = (IntPtr)p; - ComputeCommandQueue.Read((ComputeBuffer)variablePointers[varIndex], true, 0L, values.Length, ptr, null); + ComputeCommandQueue.Read((ComputeBuffer)variablePointers[argument_index], true, 0L, values.Length, ptr, null); } } return true; @@ -643,10 +668,10 @@ public bool GetMemoryArgument_Single(int varIndex, ref float[] values) /// /// Reads an array of type "Double" from the device. /// - /// 0-based number of argument in argument list. + /// 0-based index of argument in argument list. /// Array of "Double". /// False in case of error/exception. Otherwise true. - public bool GetMemoryArgument_Double(int varIndex, ref double[] values) + public bool GetMemoryArgument_Double(int argument_index, ref double[] values) { try { @@ -655,7 +680,7 @@ public bool GetMemoryArgument_Double(int varIndex, ref double[] values) fixed (double* p = (double[])values) { IntPtr ptr = (IntPtr)p; - ComputeCommandQueue.Read((ComputeBuffer)variablePointers[varIndex], true, 0L, values.Length, ptr, null); + ComputeCommandQueue.Read((ComputeBuffer)variablePointers[argument_index], true, 0L, values.Length, ptr, null); } } return true; @@ -670,6 +695,68 @@ public bool GetMemoryArgument_Double(int varIndex, ref double[] values) #endregion GetArguments + #region Destructors + + /// + /// Disposes kernel memory variable. + /// + /// 0-based index of argument in argument list. + /// True, if the operation was successful, false otherwise. + public bool ReleaseMemObject(int argument_index) + { + try + { + variablePointers[argument_index].Dispose(); + return true; + } + catch (Exception ex) + { + ErrorString += ex.Message; + return false; + } + } + + /// + /// Disposes kernel. + /// + /// True, if the operation was successful, false otherwise. + public bool ReleaseKernel() + { + try + { + kernel.Dispose(); + return true; + } + catch (Exception ex) + { + ErrorString += ex.Message; + return false; + } + } + + /// + /// Disposes ComputeProgram, ComputeCommandQueue and CommandQueue. + /// + /// True, if the operation was successful, false otherwise. + public bool ReleaseProgram() + { + try + { + ComputeProgram.Dispose(); + ComputeCommandQueue.Dispose(); + ComputeContext.Dispose(); + + return true; + } + catch (Exception ex) + { + ErrorString += ex.Message; + return false; + } + } + + #endregion Destructors + /// /// Device type of initialized device ("GPU" / "CPU"). /// diff --git a/C#/Installation script.iss b/C#/Installation script.iss index 4bb80c9..9d0298a 100644 --- a/C#/Installation script.iss +++ b/C#/Installation script.iss @@ -22,7 +22,7 @@ Name: "{app}\demo\cl"; Permissions: everyone-full Source: bin\ClooWrapperVBA.dll; DestDir: {app}; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; Source: bin\ClooWrapperVBA_x64.dll; DestDir: {app}; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; Source: bin\Cloo.dll; DestDir: {app}; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; -Source: ..\Excel\OpenCl v0.01.xlsm; DestDir: {app}\demo; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; +Source: ..\Excel\OpenCl v0.02.xlsm; DestDir: {app}\demo; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; Source: ..\Excel\cl\Performance.cl; DestDir: {app}\demo\cl; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; Source: ..\Excel\cl\MatrixMultiplication.cl; DestDir: {app}\demo\cl; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; Source: ..\Excel\Configuration.vbs; DestDir: {app}\demo; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; diff --git a/Excel/Asynchronous.bas b/Excel/Asynchronous.bas index 0dd193f..d565543 100644 --- a/Excel/Asynchronous.bas +++ b/Excel/Asynchronous.bas @@ -26,7 +26,7 @@ Sub MainLoop() While Not allTasks_Completed For i = 1 To progDevices.Count If progDevices.Item(i).ProgramDevice.ExecutionCompleted Then - Call progDevices.Item(i).ProgramDevice.GetMemoryArgument_Double(0, vecResp) ' Extract the results and do something with received data here. + result = progDevices.Item(i).ProgramDevice.GetMemoryArgument_Double(0, vecResp) ' Extract the results and do something with received data here. wsAsynchronous.Cells(logLine, 1) = "Task " & currentTaskId(i) & ", " & progDevices.Item(i).ProgramDevice.deviceType & _ progDevices.Item(i).DeviceId & ": completed" @@ -37,13 +37,13 @@ Sub MainLoop() ' Start new task If startedTasks < MAX_TASKS Then ReDim vecResp(UBound(vecResp)) ' Erase output vector. - Call progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(0, vecResp) + result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(0, vecResp) ' If you want to use callbacks, than use function below ' "CPU_Task_Completed" is a function that will obtain the callback. ' Call progDevices.Item(i).ProgramDevice.ExecuteAsync(globalWorkOffset, globalWorkSize, localWorkSize, THREAD_PRIORITY, AddressOf Asynchronous.CPU_Task_Completed) - Call progDevices.Item(i).ProgramDevice.ExecuteBackground(globalWorkOffset, globalWorkSize, localWorkSize, THREAD_PRIORITY) + result = progDevices.Item(i).ProgramDevice.ExecuteBackground(globalWorkOffset, globalWorkSize, localWorkSize, THREAD_PRIORITY) startedTasks = startedTasks + 1 currentTaskId(i) = startedTasks Else @@ -71,6 +71,15 @@ Sub MainLoop() Wend wsAsynchronous.Cells(2, currentProgress).Interior.Color = RGB(255, 255, 255) + + For i = 1 To progDevices.Count + result = progDevices.Item(i).ProgramDevice.ReleaseMemObject(3) + result = progDevices.Item(i).ProgramDevice.ReleaseMemObject(2) + result = progDevices.Item(i).ProgramDevice.ReleaseMemObject(1) + result = progDevices.Item(i).ProgramDevice.ReleaseMemObject(0) + result = progDevices.Item(i).ProgramDevice.ReleaseKernel + result = progDevices.Item(i).ProgramDevice.ReleaseProgram + Next i End Sub Sub RunAsynchronous() @@ -124,17 +133,17 @@ Sub RunAsynchronous() ReDim currentTaskId(progDevices.Count) For i = 1 To progDevices.Count - Call progDevices.Item(i).ProgramDevice.CreateKernel("DoubleMatrixMult") - Call progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(0, vecResp) - Call progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(1, vecM1) - Call progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(2, vecM2) - Call progDevices.Item(i).ProgramDevice.SetMemoryArgument_Long(3, vecQ) + result = progDevices.Item(i).ProgramDevice.CreateKernel("DoubleMatrixMult") + result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(0, vecResp) + result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(1, vecM1) + result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(2, vecM2) + result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Long(3, vecQ) Next i startedTasks = 0 ' Start execution on all found devices almost simultaneously. For i = 1 To progDevices.Count - Call progDevices.Item(i).ProgramDevice.ExecuteBackground(globalWorkOffset, globalWorkSize, localWorkSize, THREAD_PRIORITY) + result = progDevices.Item(i).ProgramDevice.ExecuteBackground(globalWorkOffset, globalWorkSize, localWorkSize, THREAD_PRIORITY) ' If you want to use callbacks, than use function below ' "CPU_Task_Completed" is a function that will obtain the callback. diff --git a/Excel/Performance.bas b/Excel/Performance.bas index 75d2373..3df0789 100644 --- a/Excel/Performance.bas +++ b/Excel/Performance.bas @@ -9,7 +9,7 @@ Sub VBA_PerformanceTest() Dim x1#(0), x2#(0), res#(0) Dim finalResults#() Dim i&, j&, k&, p&, q&, r& - Dim buildLogs$, sources$ + Dim buildLogs$, sources$, result As Boolean Dim cTime As New CTimer Dim globalWorkSize&(1), localWorkSize&(), globalWorkOffset&() Dim calcCorrect As Boolean @@ -71,26 +71,26 @@ Sub VBA_PerformanceTest() ' CPU calculations. Set programDevice_Performance = GetFirstDeviceOfType(progDevices, "CPU") - Call programDevice_Performance.CreateKernel("DoubleMatrixMult") + result = programDevice_Performance.CreateKernel("DoubleMatrixMult") globalWorkSize(0) = p globalWorkSize(1) = r vecQ(0) = q - Call programDevice_Performance.SetMemoryArgument_Double(0, vecResp) - Call programDevice_Performance.SetMemoryArgument_Double(1, vecM1) - Call programDevice_Performance.SetMemoryArgument_Double(2, vecM2) - Call programDevice_Performance.SetMemoryArgument_Long(3, vecQ) + result = programDevice_Performance.SetMemoryArgument_Double(0, vecResp) + result = programDevice_Performance.SetMemoryArgument_Double(1, vecM1) + result = programDevice_Performance.SetMemoryArgument_Double(2, vecM2) + result = programDevice_Performance.SetMemoryArgument_Long(3, vecQ) ' Start once to update cashes- - Call programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) + result = programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) ' Start real measurements. cTime.StartCounter - Call programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) + result = programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) wsPerformanceTest.Cells(3, 2) = cTime.TimeElapsed - Call programDevice_Performance.GetMemoryArgument_Double(0, vecResp) + result = programDevice_Performance.GetMemoryArgument_Double(0, vecResp) finalResults = VectorToMatrix(vecResp, p, r) ' Comparison to VBA result. @@ -103,6 +103,13 @@ Sub VBA_PerformanceTest() Next j Next i wsPerformanceTest.Cells(3, 3) = calcCorrect + + result = programDevice_Performance.ReleaseMemObject(3) + result = programDevice_Performance.ReleaseMemObject(2) + result = programDevice_Performance.ReleaseMemObject(1) + result = programDevice_Performance.ReleaseMemObject(0) + result = programDevice_Performance.ReleaseKernel + result = programDevice_Performance.ReleaseProgram Else wsPerformanceTest.Cells(3, 2) = CVErr(2042) wsPerformanceTest.Cells(3, 3) = CVErr(2042) @@ -112,27 +119,27 @@ Sub VBA_PerformanceTest() If Not (GetFirstDeviceOfType(progDevices, "GPU") Is Nothing) Then Set programDevice_Performance = GetFirstDeviceOfType(progDevices, "GPU") - Call programDevice_Performance.CreateKernel("DoubleMatrixMult") + result = programDevice_Performance.CreateKernel("DoubleMatrixMult") globalWorkSize(0) = p globalWorkSize(1) = r vecQ(0) = q ReDim vecResp(p * r - 1) - Call programDevice_Performance.SetMemoryArgument_Double(0, vecResp) - Call programDevice_Performance.SetMemoryArgument_Double(1, vecM1) - Call programDevice_Performance.SetMemoryArgument_Double(2, vecM2) - Call programDevice_Performance.SetMemoryArgument_Long(3, vecQ) + result = programDevice_Performance.SetMemoryArgument_Double(0, vecResp) + result = programDevice_Performance.SetMemoryArgument_Double(1, vecM1) + result = programDevice_Performance.SetMemoryArgument_Double(2, vecM2) + result = programDevice_Performance.SetMemoryArgument_Long(3, vecQ) ' Start once to update cashes- Call programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) ' Start real measurements. cTime.StartCounter - Call programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) + result = programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) wsPerformanceTest.Cells(4, 2) = cTime.TimeElapsed - Call programDevice_Performance.GetMemoryArgument_Double(0, vecResp) + result = programDevice_Performance.GetMemoryArgument_Double(0, vecResp) finalResults = VectorToMatrix(vecResp, p, r) ' Comparison to VBA result. @@ -145,6 +152,12 @@ Sub VBA_PerformanceTest() Next j Next i wsPerformanceTest.Cells(4, 3) = calcCorrect + result = programDevice_Performance.ReleaseMemObject(3) + result = programDevice_Performance.ReleaseMemObject(2) + result = programDevice_Performance.ReleaseMemObject(1) + result = programDevice_Performance.ReleaseMemObject(0) + result = programDevice_Performance.ReleaseKernel + result = programDevice_Performance.ReleaseProgram Else wsPerformanceTest.Cells(4, 2) = CVErr(2042) wsPerformanceTest.Cells(4, 3) = CVErr(2042) @@ -154,7 +167,7 @@ End Sub Sub GpuCpu_SingleDouble_PerformanceTest() Dim wsPerformanceTest As Worksheet Dim upper&, singles!(), doubles#(), aSingle!, aDouble#, i& - Dim sources$ + Dim sources$, result As Boolean Dim progDevices As Collection Dim programDevice_Performance As ClooWrapperVBA.ProgramDevice @@ -194,6 +207,11 @@ Sub GpuCpu_SingleDouble_PerformanceTest() wsPerformanceTest.Cells(4, 6) = GPU_PerformanceTest_Double(upper, doubles, aDouble, programDevice_Performance) End If + result = programDevice_Performance.ReleaseMemObject(1) + result = programDevice_Performance.ReleaseMemObject(0) + result = programDevice_Performance.ReleaseKernel + result = programDevice_Performance.ReleaseProgram + If GetFirstDeviceOfType(progDevices, "CPU") Is Nothing Then wsPerformanceTest.Cells(3, 5) = CVErr(2042) wsPerformanceTest.Cells(3, 6) = CVErr(2042) @@ -202,52 +220,57 @@ Sub GpuCpu_SingleDouble_PerformanceTest() wsPerformanceTest.Cells(3, 5) = CPU_PerformanceTest_Single(upper, singles, aSingle, programDevice_Performance) wsPerformanceTest.Cells(3, 6) = CPU_PerformanceTest_Double(upper, doubles, aDouble, programDevice_Performance) End If + + result = programDevice_Performance.ReleaseMemObject(1) + result = programDevice_Performance.ReleaseMemObject(0) + result = programDevice_Performance.ReleaseKernel + result = programDevice_Performance.ReleaseProgram End Sub ' Single precision performance at GPU. Function GPU_PerformanceTest_Single(upper&, singles!(), aSingle!, programDevice_Performance As ClooWrapperVBA.ProgramDevice) - Dim buildLogs$ + Dim buildLogs$, result As Boolean - Call programDevice_Performance.CreateKernel("SinglePerformance") + result = programDevice_Performance.CreateKernel("SinglePerformance") - Call programDevice_Performance.SetMemoryArgument_Single(0, singles) - Call programDevice_Performance.SetValueArgument_Single(1, aSingle) + result = programDevice_Performance.SetMemoryArgument_Single(0, singles) + result = programDevice_Performance.SetValueArgument_Single(1, aSingle) GPU_PerformanceTest_Single = PerformanceTestExecution(upper, programDevice_Performance) End Function ' Single precision performance at CPU. Function CPU_PerformanceTest_Single(upper&, singles!(), aSingle!, programDevice_Performance As ClooWrapperVBA.ProgramDevice) - Dim buildLogs$ + Dim buildLogs$, result As Boolean - Call programDevice_Performance.CreateKernel("SinglePerformance") + result = programDevice_Performance.CreateKernel("SinglePerformance") - Call programDevice_Performance.SetMemoryArgument_Single(0, singles) - Call programDevice_Performance.SetValueArgument_Single(1, aSingle) + result = programDevice_Performance.SetMemoryArgument_Single(0, singles) + result = programDevice_Performance.SetValueArgument_Single(1, aSingle) CPU_PerformanceTest_Single = PerformanceTestExecution(upper, programDevice_Performance) End Function ' Double precision performance at GPU. Function GPU_PerformanceTest_Double(upper&, doubles#(), aDouble#, programDevice_Performance As ClooWrapperVBA.ProgramDevice) - Dim buildLogs$ + Dim buildLogs$, result As Boolean - Call programDevice_Performance.CreateKernel("DoublePerformance") + result = programDevice_Performance.CreateKernel("DoublePerformance") - Call programDevice_Performance.SetMemoryArgument_Double(0, doubles) - Call programDevice_Performance.SetValueArgument_Double(1, aDouble) + result = programDevice_Performance.SetMemoryArgument_Double(0, doubles) + result = programDevice_Performance.SetValueArgument_Double(1, aDouble) GPU_PerformanceTest_Double = PerformanceTestExecution(upper, programDevice_Performance) End Function ' Double precision performance at CPU. Function CPU_PerformanceTest_Double(upper&, doubles#(), aDouble#, programDevice_Performance As ClooWrapperVBA.ProgramDevice) - Dim buildLogs$ + Dim buildLogs$, result As Boolean - Call programDevice_Performance.CreateKernel("DoublePerformance") + result = programDevice_Performance.CreateKernel("DoublePerformance") - Call programDevice_Performance.SetMemoryArgument_Double(0, doubles) - Call programDevice_Performance.SetValueArgument_Double(1, aDouble) + result = programDevice_Performance.SetMemoryArgument_Double(0, doubles) + result = programDevice_Performance.SetValueArgument_Double(1, aDouble) CPU_PerformanceTest_Double = PerformanceTestExecution(upper, programDevice_Performance) End Function diff --git a/Readme.md b/Readme.md index bf097c5..5850901 100644 --- a/Readme.md +++ b/Readme.md @@ -1,9 +1,9 @@ -# COM-wrapper of [Cloo](https://github.com/clSharp/Cloo) to start OpenCl code from Excel. -The wrapper allows to start OpenCl code on CPU and GPU devices from VBA. +# COM-wrapper of [Cloo](https://github.com/clSharp/Cloo) to execute OpenCL code from Excel. +The wrapper allows to execute OpenCL code on CPU and GPU devices from VBA. The wrapper has simple implementation and divided in two independent parts: -

ClooWrapperVBA.Configuration, to obtain configuration of available platforms and associated CPUs and GPUs.

--

ClooWrapperVBA.ProgramDevice, to compile and start OpenCl programs on CPUs and GPUs and obtain the results. It is also possible to start programs on CPUs and GPUs simultaneously (asynchronously). In asynchronous mode it is also possible to set the priority of execution.

+-

ClooWrapperVBA.ProgramDevice, to compile and start OpenCL programs on CPUs and GPUs and obtain the results. It is also possible to start programs on CPUs and GPUs simultaneously (asynchronously). In asynchronous mode it is also possible to set the priority of execution.


## Downloads @@ -27,11 +27,13 @@ The current version can be downloaded as: * ClooWrapperVBA.ProgramDevice: *

ClooWrapperVBA.ProgramDevice.Build - compiles sources for selected device.

*

ClooWrapperVBA.ProgramDevice.CreateKernel - Loads the function to execute.

- *

ClooWrapperVBA.ProgramDevice.SetValueArgument_..., ClooWrapperVBA.ProgramDevice.SetMemoryArgument_... - Sets argument values and arrays of integers, floats and doubles of the function to execute.

+ *

ClooWrapperVBA.ProgramDevice.SetValueArgument_..., ClooWrapperVBA.ProgramDevice.SetMemoryArgument_... - Sets argument values and arrays of integers, floats and doubles of the function to execute. The parameter "argument_index" starts with 0 for first argument and must be manually incrased for the next arguments. It is also very important to set variables in a right sequence. First, the variable with argument index 0, then with argument index 1 and so on.

*

ClooWrapperVBA.ProgramDevice.ExecuteSync - Execute function synchronously. Excel will move further only after execution was completed.

*

ClooWrapperVBA.ProgramDevice.ExecuteAsync - Start execution of the function asynchronously. The callback function will be called at the end of execution.

*

ClooWrapperVBA.ProgramDevice.ExecuteBackground - Start execution of the function asynchronously. After execution the flag "ClooWrapperVBA.ProgramDevice.ExecutionCompleted" is set to true.

*

ClooWrapperVBA.ProgramDevice.GetMemoryArgument_... - Read arguments (results) from the function.

+ *

ClooWrapperVBA.ProgramDevice.ReleaseMemObject - Releases instantiated memory objects. The single parameter has the same meaning as "argument_index" from SetValue/MemoryArguments. It should start with highest used "argument_index".

+ *

ClooWrapperVBA.ProgramDevice.ReleaseKernel and ClooWrapperVBA.ProgramDevice.ReleaseProgram do the rest of disposing of instantiated OpenCL parts.


## VBA samples. @@ -60,11 +62,21 @@ The current version can be downloaded as: - "deviceTypeIndex" is an index inside of same device type. -

Example: If your platform have 3 devices, one CPU and two GPUs, then the possible "deviceIndex" values are 0 (GPU), 1 (CPU) and 2 (GPU). The "deviceTypeIndex" in this configuration will be 0 and 1 for GPUs and 0 for CPU. You can obtain the sequence of devices using the Configuration.

-

To simplify usage from VBA, all devices can be added to the collection using function "CreateDeviceCollection". You can obtain the first CPU and first GPU using a function "GetFirstDeviceOfType" where the first argument is a collection of devices and the second argument is a device type, "CPU" or "GPU". The collection of all available devices is also very useful to run your code in asynchronous mode at all available devices.

-3. **ExecuteAsync** function must be used with care: +3. **Build**: Parameter "options" contain compiler options. In simplest case it can be empty ("", not "null" or "Nothing"). Among the common compiler oprions, like "-w" (inhibit all warning messages), you can also define here commonly used constants ("-D name=definition") and use them in the OpenCL code. The complete list of compiler options can be found at [official Khronos home page](https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clBuildProgram.html). +4. **ExecuteAsync** function must be used with care: -

During debugging Excel can crash because of simultaneous execution of the code in callback and "MainLoop" functions.

-

Writing out of the results to the cells in callback function can also cause an Excel crash.

- A good solution is to use instead **ExecuteBackground** function. +5. ReleaseMemObject, ReleaseKernel and ReleaseProgram are added to accurately dispose instantiated OpenCL objects and to avoid side effects from not disposed objects. Nevertheless, the current code in Excel example works correctly also without them.
## Not tested parts: --

globalWorkOffset, localWorkSize were not tested and were added analogously to globalWorkSize.

\ No newline at end of file +-

globalWorkOffset, localWorkSize were not tested and were added analogously to globalWorkSize.

+ +## FAQ: +1. Configuration: No platforms/devices were found. + - Reason: OpenCL.dll is not found in "Windows" folder. + - Solution: Get OpenCL.dll from other computer. + - Reason: The GPGPU / CPU drivers are too old and not supported by OpenCL. + - Update the drivers. +
\ No newline at end of file