diff --git a/C#/Installation script.iss b/C#/Installation script.iss index 4ba41b2..753032f 100644 --- a/C#/Installation script.iss +++ b/C#/Installation script.iss @@ -3,7 +3,7 @@ AppName=ClooWrapperVBA AppVerName=ClooWrapperVBA DefaultDirName={pf}\ClooWrapperVBA DefaultGroupName=ClooWrapperVBA -Compression=lzma +Compression=zip SolidCompression=yes SourceDir=.\ PrivilegesRequired=poweruser @@ -22,9 +22,11 @@ Name: "{app}\demo\cl"; Permissions: everyone-full Source: bin\ClooWrapperVBA.dll; DestDir: {app}; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; Source: bin\ClooWrapperVBA_x64.dll; DestDir: {app}; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; Source: bin\Cloo.dll; DestDir: {app}; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; -Source: ..\Excel\OpenCl v0.05.xlsm; DestDir: {app}\demo; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; -Source: ..\Excel\cl\Performance.cl; DestDir: {app}\demo\cl; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; -Source: ..\Excel\cl\MatrixMultiplication.cl; DestDir: {app}\demo\cl; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; +Source: ..\Excel\OpenCl example.xlsm; DestDir: {app}\demo; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; +Source: ..\Excel\cl\FloatPerformance.cl; DestDir: {app}\demo\cl; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; +Source: ..\Excel\cl\DoublePerformance.cl; DestDir: {app}\demo\cl; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; +Source: ..\Excel\cl\FloatMatrixMultiplication.cl; DestDir: {app}\demo\cl; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; +Source: ..\Excel\cl\DoubleMatrixMultiplication.cl; DestDir: {app}\demo\cl; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; Source: ..\Excel\Configuration.vbs; DestDir: {app}\demo; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; Source: bin\register.bat; DestDir: {app}; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; Source: bin\unregister.bat; DestDir: {app}; Flags: ignoreversion recursesubdirs overwritereadonly; Permissions: everyone-full; diff --git a/Excel/Asynchronous.bas b/Excel/Asynchronous.bas index d565543..1f323bb 100644 --- a/Excel/Asynchronous.bas +++ b/Excel/Asynchronous.bas @@ -10,7 +10,7 @@ Const THREAD_PRIORITY = 0 Dim progDevices As Collection Dim currentTaskId&() -Dim vecResp#() +Dim vecResp!() Dim wsAsynchronous As Worksheet Dim globalWorkSize&(1), localWorkSize&(), globalWorkOffset&() Dim logLine% @@ -26,7 +26,7 @@ Sub MainLoop() While Not allTasks_Completed For i = 1 To progDevices.Count If progDevices.Item(i).ProgramDevice.ExecutionCompleted Then - result = progDevices.Item(i).ProgramDevice.GetMemoryArgument_Double(0, vecResp) ' Extract the results and do something with received data here. + result = progDevices.Item(i).ProgramDevice.GetMemoryArgument_Single(0, vecResp) ' Extract the results and do something with received data here. wsAsynchronous.Cells(logLine, 1) = "Task " & currentTaskId(i) & ", " & progDevices.Item(i).ProgramDevice.deviceType & _ progDevices.Item(i).DeviceId & ": completed" @@ -37,7 +37,7 @@ Sub MainLoop() ' Start new task If startedTasks < MAX_TASKS Then ReDim vecResp(UBound(vecResp)) ' Erase output vector. - result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(0, vecResp) + result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Single(0, vecResp) ' If you want to use callbacks, than use function below ' "CPU_Task_Completed" is a function that will obtain the callback. @@ -83,14 +83,14 @@ Sub MainLoop() End Sub Sub RunAsynchronous() - Dim vecM1#(), vecM2#() + Dim vecM1!(), vecM2!() Dim vecQ&(1) Dim i&, j&, p&, q&, r&, nRows& Dim buildLogs$, sources$ Set wsAsynchronous = ThisWorkbook.Worksheets("Asynchronous") - Open Application.ActiveWorkbook.Path & "\cl\MatrixMultiplication.cl" For Binary As #1 + Open Application.ActiveWorkbook.Path & "\cl\FloatMatrixMultiplication.cl" For Binary As #1 sources = Space$(LOF(1)) Get #1, , sources Close #1 @@ -133,10 +133,10 @@ Sub RunAsynchronous() ReDim currentTaskId(progDevices.Count) For i = 1 To progDevices.Count - result = progDevices.Item(i).ProgramDevice.CreateKernel("DoubleMatrixMult") - result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(0, vecResp) - result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(1, vecM1) - result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Double(2, vecM2) + result = progDevices.Item(i).ProgramDevice.CreateKernel("FloatMatrixMult") + result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Single(0, vecResp) + result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Single(1, vecM1) + result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Single(2, vecM2) result = progDevices.Item(i).ProgramDevice.SetMemoryArgument_Long(3, vecQ) Next i diff --git a/Excel/HelloWorld.bas b/Excel/HelloWorld.bas index b2ec9bd..84c3eaf 100644 --- a/Excel/HelloWorld.bas +++ b/Excel/HelloWorld.bas @@ -6,10 +6,10 @@ Sub HelloWorld() Dim nRows%, currentRow&, nPlatforms&, nDevices&, i&, j&, result As Boolean Dim deviceType$, platformName$, platformVendor$, platformVersion$, deviceVendor$, deviceVersion$, driverVersion$, openCLCVersionString$ Dim maxComputeUnits&, globalMemorySize#, maxClockFrequency#, maxMemoryAllocationSize#, deviceName$, sources$, cpuCounter&, gpuCounter& - Dim buildLogs$, platformId&, deviceId&, errorString$ + Dim buildLogs$, platformId&, DeviceId&, errorString$ Dim deviceAvailable As Boolean, compilerAvailable As Boolean - Dim m1#(1, 1), m2#(1, 1), vecM1#(), vecM2#(), vecQ&(0), vecResp#(3), globalWorkOffset&(), globalWorkSize&(1), localWorkSize&() - Dim p&, q&, r&, resp#() + Dim m1!(1, 1), m2!(1, 1), vecM1!(), vecM2!(), vecQ&(0), vecResp!(3), globalWorkOffset&(), globalWorkSize&(1), localWorkSize&() + Dim p&, q&, r&, resp!() Dim clooConfiguration As New ClooWrapperVBA.Configuration Dim progDevice As ClooWrapperVBA.ProgramDevice @@ -76,7 +76,7 @@ Sub HelloWorld() ' Multiplication of two matrices. ' Read the OpenCL sources. - Open Application.ActiveWorkbook.Path & "\cl\MatrixMultiplication.cl" For Binary As #1 + Open Application.ActiveWorkbook.Path & "\cl\FloatMatrixMultiplication.cl" For Binary As #1 sources = Space$(LOF(1)) Get #1, , sources Close #1 @@ -87,13 +87,13 @@ Sub HelloWorld() result = clooConfiguration.SetPlatform(platformId) cpuCounter = 0 gpuCounter = 0 - For deviceId = 0 To clooConfiguration.Platform.Devices - 1 - result = clooConfiguration.Platform.SetDevice(deviceId) + For DeviceId = 0 To clooConfiguration.Platform.Devices - 1 + result = clooConfiguration.Platform.SetDevice(DeviceId) If clooConfiguration.Platform.device.compilerAvailable Then If clooConfiguration.Platform.device.deviceType = "CPU" Then Set progDevice = New ClooWrapperVBA.ProgramDevice - result = progDevice.Build(sources, "", platformId, deviceId, cpuCounter, buildLogs) + result = progDevice.Build(sources, "", platformId, DeviceId, cpuCounter, buildLogs) If result Then Exit Do Else @@ -102,7 +102,7 @@ Sub HelloWorld() End If If clooConfiguration.Platform.device.deviceType = "GPU" Then Set progDevice = New ClooWrapperVBA.ProgramDevice - result = progDevice.Build(sources, "", platformId, deviceId, gpuCounter, buildLogs) + result = progDevice.Build(sources, "", platformId, DeviceId, gpuCounter, buildLogs) gpuCounter = gpuCounter + 1 If result Then Exit Do @@ -111,12 +111,12 @@ Sub HelloWorld() End If End If End If - Next deviceId + Next DeviceId platformId = platformId + 1 Loop errorString = progDevice.errorString - result = progDevice.CreateKernel("DoubleMatrixMult") + result = progDevice.CreateKernel("FloatMatrixMult") ' Initialization of arrays: p = 2: q = 2: r = 2 @@ -125,18 +125,18 @@ Sub HelloWorld() m1(i, j) = wsHelloWorld.Cells(i + 1, j + 7) Next j Next i - vecM1 = MatrixToVector(m1, p, q) + vecM1 = MatrixToVectorSingle(m1, p, q) For i = 0 To q - 1 For j = 0 To r - 1 m2(i, j) = wsHelloWorld.Cells(i + 3, j + 7) Next j Next i - vecM2 = MatrixToVector(m2, q, r) + vecM2 = MatrixToVectorSingle(m2, q, r) vecQ(0) = q - result = progDevice.SetMemoryArgument_Double(0, vecResp) - result = progDevice.SetMemoryArgument_Double(1, vecM1) - result = progDevice.SetMemoryArgument_Double(2, vecM2) + result = progDevice.SetMemoryArgument_Single(0, vecResp) + result = progDevice.SetMemoryArgument_Single(1, vecM1) + result = progDevice.SetMemoryArgument_Single(2, vecM2) result = progDevice.SetMemoryArgument_Long(3, vecQ) globalWorkSize(0) = p @@ -144,9 +144,9 @@ Sub HelloWorld() result = progDevice.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) - result = progDevice.GetMemoryArgument_Double(0, vecResp) + result = progDevice.GetMemoryArgument_Single(0, vecResp) - resp = VectorToMatrix(vecResp, p, r) + resp = VectorToMatrixSingle(vecResp, p, r) For i = 0 To p - 1 For j = 0 To r - 1 diff --git a/Excel/Helpers.bas b/Excel/Helpers.bas index 83e3420..7d66959 100644 --- a/Excel/Helpers.bas +++ b/Excel/Helpers.bas @@ -37,12 +37,12 @@ Function CreateDeviceCollection(sources$) If clooConfiguration.Platform.device.deviceType = "CPU" Then result = progDevice.ProgramDevice.Build(sources, "", i - 1, j - 1, cpuCounter, buildLogs) - progDevice.deviceId = cpuCounter + progDevice.DeviceId = cpuCounter progDevice.deviceType = "CPU" If result = True Then cpuCounter = cpuCounter + 1 ElseIf clooConfiguration.Platform.device.deviceType = "GPU" Then result = progDevice.ProgramDevice.Build(sources, "", i - 1, j - 1, gpuCounter, buildLogs) - progDevice.deviceId = gpuCounter + progDevice.DeviceId = gpuCounter progDevice.deviceType = "GPU" If result = True Then gpuCounter = gpuCounter + 1 Else @@ -62,7 +62,37 @@ Function CreateDeviceCollection(sources$) End If End Function -Function MatrixToVector(m() As Double, maxi As Long, maxj As Long) As Double() +Function MatrixToVectorSingle(m() As Single, maxi As Long, maxj As Long) As Single() + Dim v() As Single + Dim i&, j& + + ReDim v(maxi * maxj - 1) + + For i = 0 To maxi - 1 + For j = 0 To maxj - 1 + v(i + maxi * j) = m(i, j) + Next j + Next i + + MatrixToVectorSingle = v +End Function + +Function VectorToMatrixSingle(v() As Single, maxi As Long, maxj As Long) As Single() + Dim i&, j& + Dim m() As Single + + ReDim m(maxi - 1, maxj - 1) + + For i = 0 To maxi - 1 + For j = 0 To maxj - 1 + m(i, j) = v(i + maxi * j) + Next j + Next i + + VectorToMatrixSingle = m +End Function + +Function MatrixToVectorDouble(m() As Double, maxi As Long, maxj As Long) As Double() Dim v() As Double Dim i&, j& @@ -74,10 +104,10 @@ Function MatrixToVector(m() As Double, maxi As Long, maxj As Long) As Double() Next j Next i - MatrixToVector = v + MatrixToVectorDouble = v End Function -Function VectorToMatrix(v() As Double, maxi As Long, maxj As Long) As Double() +Function VectorToMatrixDouble(v() As Double, maxi As Long, maxj As Long) As Double() Dim i&, j& Dim m() As Double @@ -89,5 +119,5 @@ Function VectorToMatrix(v() As Double, maxi As Long, maxj As Long) As Double() Next j Next i - VectorToMatrix = m + VectorToMatrixDouble = m End Function diff --git a/Excel/OpenCl example.xlsm b/Excel/OpenCl example.xlsm index 2bf08ac..447c252 100644 Binary files a/Excel/OpenCl example.xlsm and b/Excel/OpenCl example.xlsm differ diff --git a/Excel/Performance.bas b/Excel/Performance.bas index 8eeba67..e6766b9 100644 --- a/Excel/Performance.bas +++ b/Excel/Performance.bas @@ -1,13 +1,13 @@ Attribute VB_Name = "Performance" Option Explicit -Private Const ARRAY_SIZE = 1200 +Private Const ARRAY_SIZE = 1000 Sub VBA_PerformanceTest() Dim wsPerformanceTest As Worksheet - Dim m1#(), m2#(), vecM1#(), vecM2#(), vecResp#(), resultVba#(), vecQ&(0) - Dim x1#(0), x2#(0), res#(0) - Dim finalResults#() + Dim m1!(), m2!(), vecM1!(), vecM2!(), vecResp!(), resultVba!(), vecQ&(0) + Dim x1!(0), x2!(0), res!(0) + Dim finalResults!() Dim i&, j&, k&, p&, q&, r& Dim buildLogs$, sources$, result As Boolean Dim cTime As New CTimer @@ -31,17 +31,17 @@ Sub VBA_PerformanceTest() Randomize For i = 0 To p - 1 For j = 0 To q - 1 - m1(i, j) = (Rnd() - 0.5) * 10# + m1(i, j) = CInt((Rnd() - 0.5) * 100#) Next j Next i For i = 0 To q - 1 For j = 0 To r - 1 - m2(i, j) = (Rnd() - 0.5) * 10# + m2(i, j) = CInt((Rnd() - 0.5) * 100#) Next j Next i - vecM1 = MatrixToVector(m1, p, q) - vecM2 = MatrixToVector(m2, q, r) + vecM1 = MatrixToVectorSingle(m1, p, q) + vecM2 = MatrixToVectorSingle(m2, q, r) ' VBA matrix multiplication: cTime.StartCounter @@ -54,7 +54,7 @@ Sub VBA_PerformanceTest() Next i wsPerformanceTest.Cells(2, 2) = cTime.TimeElapsed - Open Application.ActiveWorkbook.Path & "\cl\MatrixMultiplication.cl" For Binary As #1 + Open Application.ActiveWorkbook.Path & "\cl\FloatMatrixMultiplication.cl" For Binary As #1 sources = Space$(LOF(1)) Get #1, , sources Close #1 @@ -71,15 +71,15 @@ Sub VBA_PerformanceTest() ' CPU calculations. Set programDevice_Performance = GetFirstDeviceOfType(progDevices, "CPU") - result = programDevice_Performance.CreateKernel("DoubleMatrixMult") + result = programDevice_Performance.CreateKernel("FloatMatrixMult") globalWorkSize(0) = p globalWorkSize(1) = r vecQ(0) = q - result = programDevice_Performance.SetMemoryArgument_Double(0, vecResp) - result = programDevice_Performance.SetMemoryArgument_Double(1, vecM1) - result = programDevice_Performance.SetMemoryArgument_Double(2, vecM2) + result = programDevice_Performance.SetMemoryArgument_Single(0, vecResp) + result = programDevice_Performance.SetMemoryArgument_Single(1, vecM1) + result = programDevice_Performance.SetMemoryArgument_Single(2, vecM2) result = programDevice_Performance.SetMemoryArgument_Long(3, vecQ) ' Start once to update cashes. @@ -90,8 +90,8 @@ Sub VBA_PerformanceTest() result = programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) wsPerformanceTest.Cells(3, 2) = cTime.TimeElapsed - result = programDevice_Performance.GetMemoryArgument_Double(0, vecResp) - finalResults = VectorToMatrix(vecResp, p, r) + result = programDevice_Performance.GetMemoryArgument_Single(0, vecResp) + finalResults = VectorToMatrixSingle(vecResp, p, r) ' Comparison to VBA result. calcCorrect = True @@ -119,16 +119,16 @@ Sub VBA_PerformanceTest() If Not (GetFirstDeviceOfType(progDevices, "GPU") Is Nothing) Then Set programDevice_Performance = GetFirstDeviceOfType(progDevices, "GPU") - result = programDevice_Performance.CreateKernel("DoubleMatrixMult") + result = programDevice_Performance.CreateKernel("FloatMatrixMult") globalWorkSize(0) = p globalWorkSize(1) = r vecQ(0) = q ReDim vecResp(p * r - 1) - result = programDevice_Performance.SetMemoryArgument_Double(0, vecResp) - result = programDevice_Performance.SetMemoryArgument_Double(1, vecM1) - result = programDevice_Performance.SetMemoryArgument_Double(2, vecM2) + result = programDevice_Performance.SetMemoryArgument_Single(0, vecResp) + result = programDevice_Performance.SetMemoryArgument_Single(1, vecM1) + result = programDevice_Performance.SetMemoryArgument_Single(2, vecM2) result = programDevice_Performance.SetMemoryArgument_Long(3, vecQ) ' Start once to update cashes. @@ -139,8 +139,8 @@ Sub VBA_PerformanceTest() result = programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) wsPerformanceTest.Cells(4, 2) = cTime.TimeElapsed - result = programDevice_Performance.GetMemoryArgument_Double(0, vecResp) - finalResults = VectorToMatrix(vecResp, p, r) + result = programDevice_Performance.GetMemoryArgument_Single(0, vecResp) + finalResults = VectorToMatrixSingle(vecResp, p, r) ' Comparison to VBA result. calcCorrect = True @@ -164,28 +164,30 @@ Sub VBA_PerformanceTest() End If End Sub -Sub GpuCpu_SingleDouble_PerformanceTest() +Sub GpuCpu_FloatDouble_PerformanceTest() + GpuCpu_Float_PerformanceTest + GpuCpu_Double_PerformanceTest +End Sub + +Sub GpuCpu_Float_PerformanceTest() Dim wsPerformanceTest As Worksheet - Dim upper&, singles!(), doubles#(), aSingle!, aDouble#, i& + Dim upper&, singles!(), aSingle!, i& Dim sources$, result As Boolean Dim progDevices As Collection Dim programDevice_Performance As ClooWrapperVBA.ProgramDevice Set wsPerformanceTest = ThisWorkbook.Worksheets("Performance") - wsPerformanceTest.Range("E3:F4").ClearContents + wsPerformanceTest.Range("E3:E4").ClearContents upper = 10000000 ReDim singles(upper) - ReDim doubles(upper) For i = 0 To upper - 1 singles(i) = i - doubles(i) = i Next i aSingle = 2! - aDouble = 2# - Open Application.ActiveWorkbook.Path & "\cl\Performance.cl" For Binary As #1 + Open Application.ActiveWorkbook.Path & "\cl\FloatPerformance.cl" For Binary As #1 sources = Space$(LOF(1)) Get #1, , sources Close #1 @@ -200,11 +202,9 @@ Sub GpuCpu_SingleDouble_PerformanceTest() If GetFirstDeviceOfType(progDevices, "GPU") Is Nothing Then wsPerformanceTest.Cells(4, 5) = CVErr(2042) - wsPerformanceTest.Cells(4, 6) = CVErr(2042) Else Set programDevice_Performance = GetFirstDeviceOfType(progDevices, "GPU") - wsPerformanceTest.Cells(4, 5) = GPU_PerformanceTest_Single(upper, singles, aSingle, programDevice_Performance) - wsPerformanceTest.Cells(4, 6) = GPU_PerformanceTest_Double(upper, doubles, aDouble, programDevice_Performance) + wsPerformanceTest.Cells(4, 5) = PerformanceTest_Single(upper, singles, aSingle, programDevice_Performance) result = programDevice_Performance.ReleaseMemObject(1) result = programDevice_Performance.ReleaseMemObject(0) @@ -214,11 +214,9 @@ Sub GpuCpu_SingleDouble_PerformanceTest() If GetFirstDeviceOfType(progDevices, "CPU") Is Nothing Then wsPerformanceTest.Cells(3, 5) = CVErr(2042) - wsPerformanceTest.Cells(3, 6) = CVErr(2042) Else Set programDevice_Performance = GetFirstDeviceOfType(progDevices, "CPU") - wsPerformanceTest.Cells(3, 5) = CPU_PerformanceTest_Single(upper, singles, aSingle, programDevice_Performance) - wsPerformanceTest.Cells(3, 6) = CPU_PerformanceTest_Double(upper, doubles, aDouble, programDevice_Performance) + wsPerformanceTest.Cells(3, 5) = PerformanceTest_Single(upper, singles, aSingle, programDevice_Performance) result = programDevice_Performance.ReleaseMemObject(1) result = programDevice_Performance.ReleaseMemObject(0) @@ -227,44 +225,76 @@ Sub GpuCpu_SingleDouble_PerformanceTest() End If End Sub -' Single precision performance at GPU. -Function GPU_PerformanceTest_Single(upper&, singles!(), aSingle!, programDevice_Performance As ClooWrapperVBA.ProgramDevice) - Dim buildLogs$, result As Boolean +Sub GpuCpu_Double_PerformanceTest() + Dim wsPerformanceTest As Worksheet + Dim upper&, doubles#(), aDouble#, i& + Dim sources$, result As Boolean + Dim progDevices As Collection + Dim programDevice_Performance As ClooWrapperVBA.ProgramDevice - result = programDevice_Performance.CreateKernel("SinglePerformance") + Set wsPerformanceTest = ThisWorkbook.Worksheets("Performance") + wsPerformanceTest.Range("F3:F4").ClearContents - result = programDevice_Performance.SetMemoryArgument_Single(0, singles) - result = programDevice_Performance.SetValueArgument_Single(1, aSingle) + upper = 10000000 + ReDim doubles(upper) - GPU_PerformanceTest_Single = PerformanceTestExecution(upper, programDevice_Performance) -End Function - -' Single precision performance at CPU. -Function CPU_PerformanceTest_Single(upper&, singles!(), aSingle!, programDevice_Performance As ClooWrapperVBA.ProgramDevice) - Dim buildLogs$, result As Boolean + For i = 0 To upper - 1 + doubles(i) = i + Next i + aDouble = 2# - result = programDevice_Performance.CreateKernel("SinglePerformance") + Open Application.ActiveWorkbook.Path & "\cl\DoublePerformance.cl" For Binary As #1 + sources = Space$(LOF(1)) + Get #1, , sources + Close #1 - result = programDevice_Performance.SetMemoryArgument_Single(0, singles) - result = programDevice_Performance.SetValueArgument_Single(1, aSingle) + ' Adding of all CPU and GPU devices to collection. + Set progDevices = CreateDeviceCollection(sources) - CPU_PerformanceTest_Single = PerformanceTestExecution(upper, programDevice_Performance) -End Function + If progDevices Is Nothing Then + MsgBox ("No devices found! Something is wrong!") + Exit Sub + End If + + If GetFirstDeviceOfType(progDevices, "GPU") Is Nothing Then + wsPerformanceTest.Cells(4, 6) = CVErr(2042) + Else + Set programDevice_Performance = GetFirstDeviceOfType(progDevices, "GPU") + wsPerformanceTest.Cells(4, 6) = PerformanceTest_Double(upper, doubles, aDouble, programDevice_Performance) + + result = programDevice_Performance.ReleaseMemObject(1) + result = programDevice_Performance.ReleaseMemObject(0) + result = programDevice_Performance.ReleaseKernel + result = programDevice_Performance.ReleaseProgram + End If + + If GetFirstDeviceOfType(progDevices, "CPU") Is Nothing Then + wsPerformanceTest.Cells(3, 6) = CVErr(2042) + Else + Set programDevice_Performance = GetFirstDeviceOfType(progDevices, "CPU") + wsPerformanceTest.Cells(3, 6) = PerformanceTest_Double(upper, doubles, aDouble, programDevice_Performance) + + result = programDevice_Performance.ReleaseMemObject(1) + result = programDevice_Performance.ReleaseMemObject(0) + result = programDevice_Performance.ReleaseKernel + result = programDevice_Performance.ReleaseProgram + End If +End Sub -' Double precision performance at GPU. -Function GPU_PerformanceTest_Double(upper&, doubles#(), aDouble#, programDevice_Performance As ClooWrapperVBA.ProgramDevice) +' Single precision performance at CPU / GPU. +Function PerformanceTest_Single(upper&, singles!(), aSingle!, programDevice_Performance As ClooWrapperVBA.ProgramDevice) Dim buildLogs$, result As Boolean - result = programDevice_Performance.CreateKernel("DoublePerformance") + result = programDevice_Performance.CreateKernel("FloatPerformance") - result = programDevice_Performance.SetMemoryArgument_Double(0, doubles) - result = programDevice_Performance.SetValueArgument_Double(1, aDouble) + result = programDevice_Performance.SetMemoryArgument_Single(0, singles) + result = programDevice_Performance.SetValueArgument_Single(1, aSingle) - GPU_PerformanceTest_Double = PerformanceTestExecution(upper, programDevice_Performance) + PerformanceTest_Single = PerformanceTestExecution(upper, programDevice_Performance) End Function -' Double precision performance at CPU. -Function CPU_PerformanceTest_Double(upper&, doubles#(), aDouble#, programDevice_Performance As ClooWrapperVBA.ProgramDevice) +' Double precision performance at CPU / GPU. +Function PerformanceTest_Double(upper&, doubles#(), aDouble#, programDevice_Performance As ClooWrapperVBA.ProgramDevice) Dim buildLogs$, result As Boolean result = programDevice_Performance.CreateKernel("DoublePerformance") @@ -272,7 +302,7 @@ Function CPU_PerformanceTest_Double(upper&, doubles#(), aDouble#, programDevice_ result = programDevice_Performance.SetMemoryArgument_Double(0, doubles) result = programDevice_Performance.SetValueArgument_Double(1, aDouble) - CPU_PerformanceTest_Double = PerformanceTestExecution(upper, programDevice_Performance) + PerformanceTest_Double = PerformanceTestExecution(upper, programDevice_Performance) End Function Function PerformanceTestExecution(upper&, programDevice_Performance) @@ -305,9 +335,9 @@ Function PerformanceTestExecution(upper&, programDevice_Performance) End Function Sub Test_OneAfterAnother() - Dim m1#(), m2#(), vecM1#(), vecM2#(), vecResp#(), resultVba#(), vecQ&(0) - Dim x1#(0), x2#(0), res#(0) - Dim finalResults#() + Dim m1!(), m2!(), vecM1!(), vecM2!(), vecResp!(), resultVba!(), vecQ&(0) + Dim x1!(0), x2!(0), res!(0) + Dim finalResults!() Dim i&, j&, k&, p&, q&, r& Dim buildLogs$, sources$, result As Boolean Dim cTime As New CTimer @@ -340,7 +370,7 @@ Sub Test_OneAfterAnother() Next j Next i - Open Application.ActiveWorkbook.Path & "\cl\MatrixMultiplication.cl" For Binary As #1 + Open Application.ActiveWorkbook.Path & "\cl\FloatMatrixMultiplication.cl" For Binary As #1 sources = Space$(LOF(1)) Get #1, , sources Close #1 @@ -357,22 +387,22 @@ Sub Test_OneAfterAnother() ' CPU calculations. Set programDevice_Performance = GetFirstDeviceOfType(progDevices, "CPU") - result = programDevice_Performance.CreateKernel("DoubleMatrixMult") + result = programDevice_Performance.CreateKernel("FloatMatrixMult") globalWorkSize(0) = p globalWorkSize(1) = r vecQ(0) = q - result = programDevice_Performance.SetMemoryArgument_Double(0, vecResp) - result = programDevice_Performance.SetMemoryArgument_Double(1, vecM1) - result = programDevice_Performance.SetMemoryArgument_Double(2, vecM2) + result = programDevice_Performance.SetMemoryArgument_Single(0, vecResp) + result = programDevice_Performance.SetMemoryArgument_Single(1, vecM1) + result = programDevice_Performance.SetMemoryArgument_Single(2, vecM2) result = programDevice_Performance.SetMemoryArgument_Long(3, vecQ) result = programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) - result = programDevice_Performance.GetMemoryArgument_Double(0, vecResp) + result = programDevice_Performance.GetMemoryArgument_Single(0, vecResp) finalResults = VectorToMatrix(vecResp, p, r) - + ' Comparison to VBA result. calcCorrect = True For i = 0 To p - 1 @@ -383,12 +413,11 @@ Sub Test_OneAfterAnother() Next j Next i - - result = programDevice_Performance.SetMemoryArgument_Double(1, vecM2) - result = programDevice_Performance.SetMemoryArgument_Double(2, vecM1) + result = programDevice_Performance.SetMemoryArgument_Single(1, vecM2) + result = programDevice_Performance.SetMemoryArgument_Single(2, vecM1) result = programDevice_Performance.ExecuteSync(globalWorkOffset, globalWorkSize, localWorkSize) - result = programDevice_Performance.GetMemoryArgument_Double(0, vecResp) + result = programDevice_Performance.GetMemoryArgument_Single(0, vecResp) finalResults = VectorToMatrix(vecResp, p, r) ' VBA matrix multiplication: diff --git a/Excel/cl/DoubleMatrixMultiplication.cl b/Excel/cl/DoubleMatrixMultiplication.cl new file mode 100644 index 0000000..ebd9435 --- /dev/null +++ b/Excel/cl/DoubleMatrixMultiplication.cl @@ -0,0 +1,15 @@ +__kernel void +DoubleMatrixMult(__global double* MResp, __global double* M1, __global double* M2, __global int* q) +{ + // Vector element index + int i = get_global_id(0); + int j = get_global_id(1); + int p = get_global_size(0); + int r = get_global_size(1); + MResp[i + p * j] = 0; + int QQ = q[0]; + for (int k = 0; k < QQ; k++) + { + MResp[i + p * j] += M1[i + p * k] * M2[k + QQ * j]; + } +} \ No newline at end of file diff --git a/Excel/cl/Performance.cl b/Excel/cl/DoublePerformance.cl similarity index 83% rename from Excel/cl/Performance.cl rename to Excel/cl/DoublePerformance.cl index ac50347..fba72bc 100644 --- a/Excel/cl/Performance.cl +++ b/Excel/cl/DoublePerformance.cl @@ -15,16 +15,6 @@ #define MAD_1024(x,y) MAD_256(x, y); MAD_256(x, y); MAD_256(x, y); MAD_256(x, y); #define MAD_4096(x,y) MAD_1024(x, y); MAD_1024(x, y); MAD_1024(x, y); MAD_1024(x, y); -__kernel void SinglePerformance(__global float* ptr, float _A) { - float x = _A; - float y = (float)get_local_id(0); - - MAD_1024(x, y); - MAD_1024(x, y); - - ptr[get_global_id(0)] = y; -} - __kernel void DoublePerformance(__global double* ptr, double _A) { double x = _A; double y = (double)get_local_id(0); diff --git a/Excel/cl/FloatMatrixMultiplication.cl b/Excel/cl/FloatMatrixMultiplication.cl new file mode 100644 index 0000000..33638f9 --- /dev/null +++ b/Excel/cl/FloatMatrixMultiplication.cl @@ -0,0 +1,15 @@ +__kernel void +FloatMatrixMult(__global float* MResp, __global float* M1, __global float* M2, __global int* q) +{ + // Vector element index + int i = get_global_id(0); + int j = get_global_id(1); + int p = get_global_size(0); + int r = get_global_size(1); + MResp[i + p * j] = 0; + int QQ = q[0]; + for (int k = 0; k < QQ; k++) + { + MResp[i + p * j] += M1[i + p * k] * M2[k + QQ * j]; + } +} \ No newline at end of file diff --git a/Excel/cl/FloatPerformance.cl b/Excel/cl/FloatPerformance.cl new file mode 100644 index 0000000..60b55f4 --- /dev/null +++ b/Excel/cl/FloatPerformance.cl @@ -0,0 +1,20 @@ +#undef MAD_4 +#undef MAD_16 +#undef MAD_64 + +#define MAD_4(x, y) x = y*x+y; y = x*y+x; x = y*x+y; y = x*y+x; +#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); +#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); +#define MAD_256(x,y) MAD_64(x, y); MAD_64(x, y); MAD_64(x, y); MAD_64(x, y); +#define MAD_1024(x,y) MAD_256(x, y); MAD_256(x, y); MAD_256(x, y); MAD_256(x, y); +#define MAD_4096(x,y) MAD_1024(x, y); MAD_1024(x, y); MAD_1024(x, y); MAD_1024(x, y); + +__kernel void FloatPerformance(__global float* ptr, float _A) { + float x = _A; + float y = (float)get_local_id(0); + + MAD_1024(x, y); + MAD_1024(x, y); + + ptr[get_global_id(0)] = y; +} \ No newline at end of file diff --git a/Excel/cl/MatrixMultiplication.cl b/Excel/cl/MatrixMultiplication.cl deleted file mode 100644 index 3ff6e81..0000000 --- a/Excel/cl/MatrixMultiplication.cl +++ /dev/null @@ -1,31 +0,0 @@ -__kernel void -FloatMatrixMult(__global float* MResp, __global float* M1, __global float* M2, __global int* q) -{ - // Vector element index - int i = get_global_id(0); - int j = get_global_id(1); - int p = get_global_size(0); - int r = get_global_size(1); - MResp[i + p * j] = 0; - int QQ = q[0]; - for (int k = 0; k < QQ; k++) - { - MResp[i + p * j] += M1[i + p * k] * M2[k + QQ * j]; - } -} - -__kernel void -DoubleMatrixMult(__global double* MResp, __global double* M1, __global double* M2, __global int* q) -{ - // Vector element index - int i = get_global_id(0); - int j = get_global_id(1); - int p = get_global_size(0); - int r = get_global_size(1); - MResp[i + p * j] = 0; - int QQ = q[0]; - for (int k = 0; k < QQ; k++) - { - MResp[i + p * j] += M1[i + p * k] * M2[k + QQ * j]; - } -} \ No newline at end of file diff --git a/Readme.md b/Readme.md index c516bfb..a812c66 100644 --- a/Readme.md +++ b/Readme.md @@ -82,5 +82,4 @@ The current version can be downloaded as: - Solution: Get OpenCL.dll from other computer. - Reason: The GPGPU / CPU drivers are too old and not supported by OpenCL. - Update the drivers. - - Regarding Xeon processors, it is recommended that you install the latest drivers, as detailed in the article "[OpenCL™ Drivers and Runtimes for Intel® Architecture](https://www.codeproject.com/Articles/1230011/OpenCL-Drivers-and-Runtimes-for-Intel-Architectu)".