Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenCL based stripped prefix-sum blur #12

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified metal/.DS_Store
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@
<objects>
<viewController id="XfG-lQ-9wD" sceneMemberID="viewController">
<view key="view" id="m2S-Jp-Qdl" customClass="MultiscaleView" customModule="multiscale_turing_patterns">
<rect key="frame" x="0.0" y="0.0" width="1024" height="1024"/>
<rect key="frame" x="0.0" y="0.0" width="512" height="512"/>
<autoresizingMask key="autoresizingMask"/>
</view>
</viewController>
Expand Down
8 changes: 4 additions & 4 deletions metal/multiscale-turing-patterns/MultiscaleView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,10 @@ class MultiscaleView: MTKView {
renderState()

// if frameCount > 10 {
let filePath = "/Users/elias.jordan/Desktop/render/frame-" + String(format: "%04d", frameCount) + ".png"
let url = URL(fileURLWithPath: filePath)
let tex = currentDrawable!.texture
writeTexture(tex, url: url)
// let filePath = "/Users/elias.jordan/Desktop/render/frame-" + String(format: "%04d", frameCount) + ".png"
// let url = URL(fileURLWithPath: filePath)
// let tex = currentDrawable!.texture
// writeTexture(tex, url: url)
// }

frameCount += 1
Expand Down
17 changes: 12 additions & 5 deletions processing-app/build.sbt
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@

ThisBuild / version := "0.1.0-SNAPSHOT"
ThisBuild / organization := "io.elijordan"
ThisBuild / version := "0.1.0-SNAPSHOT"
ThisBuild / organization := "io.elijordan"

lazy val root = (project in file("."))
.settings(
name := "processing-app",
libraryDependencies ++= Seq(
"org.processing" % "core" % "3.3.7",
"com.github.wendykierp" % "JTransforms" % "3.1"
)
"org.jogamp.jocl" % "jocl-main" % "2.3.2",
"com.github.wendykierp" % "JTransforms" % "3.1",
"com.github.jknack" % "handlebars" % "4.2.0",
"org.junit.jupiter" % "junit-jupiter" % "5.7.1" % Test
),
// unmanagedJars in Compile := {
// val libs = baseDirectory.value / "lib"
// val dirs = libs +++ (libs / "jocl-rc") +++ (libs / "jogl-rc") +++ (libs / "processing-core-custom-build")
// (dirs ** "*.jar").classpath
// }
)
4 changes: 4 additions & 0 deletions processing-app/edit-kernels.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

open -a XCode ./src/main/resources/cl-kernels/cl-kernels.xcodeproj

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added processing-app/lib/jocl-rc/jocl.jar
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added processing-app/lib/jogl-rc/gluegen-rt.jar
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added processing-app/lib/jogl-rc/jogl-all.jar
Binary file not shown.
Binary file not shown.
33 changes: 33 additions & 0 deletions processing-app/src/main/java/opencl/Devices.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package opencl;

import com.jogamp.opencl.CLContext;
import com.jogamp.opencl.CLDevice;

public class Devices {

public static CLDevice getAMDGPU(CLContext context) {

for(CLDevice device : context.getDevices()) {
if(device.getVendor().contains("AMD") && device.getType() == CLDevice.Type.GPU) {
return device;
}
}

return null;
}

public static CLDevice getIntelGPU(CLContext context) {

for(CLDevice device : context.getDevices()) {
if(device.getVendor().contains("Intel") && device.getType() == CLDevice.Type.GPU) {
return device;
}
}

return null;
}

public static CLDevice getCPU(CLContext context) {
return context.getMaxFlopsDevice(CLDevice.Type.CPU);
}
}
114 changes: 114 additions & 0 deletions processing-app/src/main/java/opencl/OpenGLInterop.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package opencl;

import com.jogamp.opencl.*;
import com.jogamp.opencl.gl.CLGLContext;
import com.jogamp.opencl.gl.CLGLTexture2d;
import com.jogamp.opengl.GL2;
import com.thomasdiewald.pixelflow.java.DwPixelFlow;
import com.thomasdiewald.pixelflow.java.dwgl.DwGLTexture;
import com.thomasdiewald.pixelflow.java.imageprocessing.filter.Copy;
import com.thomasdiewald.pixelflow.java.imageprocessing.filter.DwFilter;
import processing.core.PApplet;
import processing.core.PGraphics;
import processing.opengl.PGraphicsOpenGL;

import java.io.IOException;

public class OpenGLInterop extends PApplet {

private DwPixelFlow pixelFlow;
private CLGLContext clContext;
private DwGLTexture buf;
private CLGLTexture2d<?> clglBuf;
private CLKernel kernel;
private CLCommandQueue queue;

PGraphics t;

@Override
public void settings() {
size(1024, 1024, P2D);
}

@Override
public void setup() {
t = createGraphics(width, height, P2D);
this.pixelFlow = new DwPixelFlow(this);

// 1. Create an OpenCL context that is associated with the current OpenGL
// context. This allows memory to be shared between the two systems without
// being copied.
clContext = CLGLContext.create(pixelFlow.pjogl.context);

// 2. Initialise an OpenGL texture that will be used as a buffer in OpenCL.
// We need to initialise it in OpenGL then acquire it for use in OpenCL.
buf = new DwGLTexture();
buf.resize(
pixelFlow,
GL2.GL_RGBA32F,
width, height,
GL2.GL_RGBA,
GL2.GL_FLOAT,
GL2.GL_NEAREST,
GL2.GL_CLAMP_TO_EDGE,
4,
4
);
buf.clear(0.0f);



// 3. Create an OpenCL object that references the OpenGL buffer that was just initialised.
clglBuf = clContext.createFromGLTexture2d(buf.target, buf.HANDLE[0], 0);


// 4. Initalise the OpenCL device and kernel
CLDevice device = Devices.getAMDGPU(clContext);
if (!device.isGLMemorySharingSupported()) {
throw new RuntimeException("GL mem sharing not supported");
}
kernel = getKernel(clContext);
kernel.putArg(clglBuf);


// 5. When submitting the kernel to the device, we need to ensure that
// we also submit the commands to acquire our OpenGL buffer as well
// otherwise the object will not be shared correctly.
queue = device.createCommandQueue();


}



@Override
public void draw() {
queue.putAcquireGLObject(clglBuf);
queue.put2DRangeKernel(kernel,
0, 0,
width, height,
0, 0
);
queue.putReleaseGLObject(clglBuf);
queue.finish();

// 6. Copy the texture buffer to the display
Copy copy = DwFilter.get(pixelFlow).copy;
copy.apply(buf, (PGraphicsOpenGL) g);
image(g, 0, 0);
}

private CLKernel getKernel(CLContext context) {
try {
String path = "/cl-kernels/gl_interop.cl";
CLProgram program = context.createProgram(getClass().getResourceAsStream(path)).build();
return program.createCLKernel("red");
} catch (IOException e) {
throw new RuntimeException(e);
}
}

public static void main(String[] args) {
PApplet.main(OpenGLInterop.class);
}
}
20 changes: 20 additions & 0 deletions processing-app/src/main/java/opencl/PrefixScanReference.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package opencl;

public class PrefixScanReference {
public static void exclusiveScan(float[] input, float[] output) {
int n = input.length;
output[0] = 0;
for (int i=1; i<n; i++) {
output[i] = output[i-1] + input[i-1];
}
}

public static void inclusiveScan(float[] input, float[] output) {
float sum = input[0];
output[0] = input[0];
for (int j = 1; j < input.length; j++) {
sum += input[j];
output[j] = sum;
}
}
}
108 changes: 108 additions & 0 deletions processing-app/src/main/java/opencl/ScanBlur.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package opencl;

import com.jogamp.opencl.*;

import java.io.IOException;
import java.nio.FloatBuffer;
import java.util.Arrays;
import java.util.Map;

public class ScanBlur {

private static final String scan_blur_image = "scan_blur_image";

private final CLContext context;
final CLCommandQueue queue;
private final Map<String, CLKernel> kernels;

private final ScanImage2d sum;

public ScanBlur(CLContext context, CLCommandQueue queue) {
this.context = context;
this.queue = queue;

this.kernels = loadKernels(queue.getDevice());
this.sum = new ScanImage2d(context, queue);
}

private Map<String, CLKernel> loadKernels(CLDevice device) {
try {
String path = "/cl-kernels/scan_blur.cl";
CLProgram program = this.context.createProgram(getClass().getResourceAsStream(path));
return program.build(CLProgram.CompilerOptions.FAST_RELAXED_MATH, device).createCLKernels();
} catch (IOException e) {
throw new RuntimeException(e);
}
}

public CLImage2d<?> blur(CLImage2d<?> in, CLImage2d<?> ping, CLImage2d<?> pong, int radius) {

this.queue.finish();
long startScan = System.currentTimeMillis();
CLImage2d<?> scanData = this.sum.run(in, ping, pong);
CLImage2d<?> blurOut = scanData.ID == ping.ID ? pong : ping;

this.queue.finish();
long endScan = System.currentTimeMillis();
System.out.println("Scan Took: " + (endScan - startScan) + " ms");

// this.queue.putReadImage(pong, true);
// System.out.println("Scan Result");
// print((FloatBuffer) pong.getBuffer(), in.width, in.height);
// System.out.println("---");

runBlurKernel(scanData, blurOut, radius, null);
return blurOut;
}

public void runBlurKernel(CLImage2d<?> scanData, CLImage2d<?> out, int radius, CLEventList events) {
// this.queue.finish();
// long blurStart = System.currentTimeMillis();
CLKernel kernel = this.kernels.get(scan_blur_image);
kernel.rewind();
kernel
.putArg(scanData)
.putArg(out)
.putArg(radius)
.putArg(out.width)
.putArg(out.height);

// TODO: 16x16 seems to work well here, but not when the global size < 16x16
int localSizeX = 0;
int localSizeY = 0;
this.queue.put2DRangeKernel(
kernel,
0, 0,
out.width, out.height,
localSizeX, localSizeY,
events
);

// this.queue.finish();
// long blurEnd = System.currentTimeMillis();
// System.out.println("ScanBlur.runBlurKernel(radius=" + radius + "): " + (blurEnd - blurStart) + " ms");
}


static void print(String pre, FloatBuffer buf) {
int capacity = buf.capacity();
float[] res = new float[capacity];
buf.get(res);
buf.rewind();
System.out.println(pre + ": " + Arrays.toString(res));
}

static void print(FloatBuffer buf, int width, int height) {
float[] result = new float[width * height];
buf.get(result);
buf.rewind();

for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int idx = y * width + x;
System.out.print(result[idx] + " ");
}
System.out.println();
}
}
}
Loading