From 6866bb77d0b5b377c86e7e99e46e92035e0b76bc Mon Sep 17 00:00:00 2001
From: Eli Jordan <elias.jordan@tapad.com>
Date: Sun, 4 Apr 2021 16:16:57 +0200
Subject: [PATCH 1/2] experiment with glsl based turing patterns

---
 .../src/main/java/glslfft/AppletTest.java     |  45 ++++
 .../src/main/java/glslfft/FftBlurTest.java    |  64 ++++++
 .../src/main/java/glslfft/FftPass.java        |   2 +
 .../src/main/java/glslfft/GlslFft.java        |  75 +++----
 .../src/main/java/glslfft/GlslFftTest.java    | 133 ++++--------
 .../src/main/java/prefixsum/Buffer.java       |  31 +++
 .../src/main/java/prefixsum/PrefixSum.java    | 140 +++++++++++++
 .../main/java/prefixsum/PrefixSumBlur.java    |  75 +++++++
 .../java/prefixsum/PrefixSumBlurRender.java   |  77 +++++++
 .../java/prefixsum/PrefixSumBlurTest.java     |  57 +++++
 .../main/java/prefixsum/PrefixSumTest.java    | 197 ++++++++++++++++++
 .../main/java/turingpatterns_gpu/GpuGrid.java |  98 +++++++++
 .../java/turingpatterns_gpu/GpuScale.java     |  28 +++
 .../java/turingpatterns_gpu/GpuSketch.java    |  85 ++++++++
 .../turingpatterns_gpu/GrayscaleRender.java   |  31 +++
 .../main/resources/glslfft/circle-kernel.frag |  33 +++
 .../src/main/resources/glslfft/fft.frag       |   2 +-
 .../src/main/resources/glslfft/test.frag      |  16 --
 .../main/resources/prefixsum/prefixsum.frag   |  22 ++
 .../resources/prefixsum/render-greyscale.frag |  27 +++
 .../prefixsum/striped-prefixsum-blur.frag     |  48 +++++
 .../prefixsum/turing-pattern-step.frag        |  46 ++++
 22 files changed, 1181 insertions(+), 151 deletions(-)
 create mode 100644 processing-app/src/main/java/glslfft/AppletTest.java
 create mode 100644 processing-app/src/main/java/glslfft/FftBlurTest.java
 create mode 100644 processing-app/src/main/java/prefixsum/Buffer.java
 create mode 100644 processing-app/src/main/java/prefixsum/PrefixSum.java
 create mode 100644 processing-app/src/main/java/prefixsum/PrefixSumBlur.java
 create mode 100644 processing-app/src/main/java/prefixsum/PrefixSumBlurRender.java
 create mode 100644 processing-app/src/main/java/prefixsum/PrefixSumBlurTest.java
 create mode 100644 processing-app/src/main/java/prefixsum/PrefixSumTest.java
 create mode 100644 processing-app/src/main/java/turingpatterns_gpu/GpuGrid.java
 create mode 100644 processing-app/src/main/java/turingpatterns_gpu/GpuScale.java
 create mode 100644 processing-app/src/main/java/turingpatterns_gpu/GpuSketch.java
 create mode 100644 processing-app/src/main/java/turingpatterns_gpu/GrayscaleRender.java
 create mode 100644 processing-app/src/main/resources/glslfft/circle-kernel.frag
 delete mode 100644 processing-app/src/main/resources/glslfft/test.frag
 create mode 100644 processing-app/src/main/resources/prefixsum/prefixsum.frag
 create mode 100644 processing-app/src/main/resources/prefixsum/render-greyscale.frag
 create mode 100644 processing-app/src/main/resources/prefixsum/striped-prefixsum-blur.frag
 create mode 100644 processing-app/src/main/resources/prefixsum/turing-pattern-step.frag

diff --git a/processing-app/src/main/java/glslfft/AppletTest.java b/processing-app/src/main/java/glslfft/AppletTest.java
new file mode 100644
index 0000000..c41aa17
--- /dev/null
+++ b/processing-app/src/main/java/glslfft/AppletTest.java
@@ -0,0 +1,45 @@
+package glslfft;
+
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import processing.core.PApplet;
+
+import java.lang.reflect.Method;
+
+public class AppletTest extends PApplet {
+
+   @Override
+   public void settings() {
+      size(0, 0, P2D);
+   }
+
+   @Override
+   public void setup() {
+
+      StringBuilder result = new StringBuilder();
+      try {
+         for(Method m : this.getClass().getDeclaredMethods()) {
+            if(m.getName().startsWith("test") && m.getParameterCount() == 0) {
+               boolean success;
+               try {
+                  println("==> Starting: " + m.getName());
+                  m.invoke(this);
+                  success = true;
+               } catch(Exception e) {
+                  e.printStackTrace(System.out);
+                  success = false;
+               }
+               println("<== Finished: " + m.getName());
+               if(success) {
+                  result.append(m.getName()).append(": ✅ \n");
+               } else {
+                  result.append(m.getName()).append(": ❌ \n");
+               }
+            }
+         }
+      } finally {
+         println("\nTest Results Summary: ");
+         println(result);
+         exit();
+      }
+   }
+}
diff --git a/processing-app/src/main/java/glslfft/FftBlurTest.java b/processing-app/src/main/java/glslfft/FftBlurTest.java
new file mode 100644
index 0000000..dba361d
--- /dev/null
+++ b/processing-app/src/main/java/glslfft/FftBlurTest.java
@@ -0,0 +1,64 @@
+package glslfft;
+
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import com.thomasdiewald.pixelflow.java.dwgl.DwGLSLProgram;
+import com.thomasdiewald.pixelflow.java.imageprocessing.filter.DwFilter;
+import processing.core.PApplet;
+import processing.core.PGraphics;
+import processing.opengl.PGraphicsOpenGL;
+
+import java.util.List;
+
+public class FftBlurTest extends PApplet {
+   private DwPixelFlow context;
+   private GlslFft fft;
+
+   @Override
+   public void settings() {
+      size(512, 256, P2D);
+   }
+
+   @Override
+   public void setup() {
+      context = new DwPixelFlow(this);
+      fft = new GlslFft(context);
+
+      DwGLSLProgram shader = context.createShader("glslfft/circle-kernel.frag");
+      context.begin();
+      context.beginDraw((PGraphicsOpenGL) g);
+
+      shader.begin();
+      shader.uniform2f("resolution", (float) width, (float) height);
+      shader.drawFullScreenQuad();
+      shader.end();
+
+      context.endDraw();
+      context.end();
+
+//      PGraphics d = createGraphics(width, height, P2D);
+//
+//      d.beginDraw();
+//      d.background(0);
+//      d.fill(255);
+//      d.ellipse(width/2f, height/2f, 200, 200);
+//      d.endDraw();
+
+//      GlslFft.FftBuffer in = fft.newBuffer(width, height);
+//      GlslFft.FftBuffer ping = fft.newBuffer(width, height);
+//      GlslFft.FftBuffer pong = fft.newBuffer(width, height);
+//      GlslFft.FftBuffer out = fft.newBuffer(width, height);
+//
+//      List<FftPass<GlslFft.FftBuffer>> passes = fft.forward(in, ping, pong, out, width, height);
+//      fft.runPasses(passes);
+
+      // TODO: kernel, multiply, inverse
+
+//      DwFilter.get(context).copy.apply(d, in.buf);
+//      DwFilter.get(context).copy.apply(out.buf, (PGraphicsOpenGL) g);
+
+   }
+
+   public static void main(String[] args) {
+      PApplet.main(FftBlurTest.class);
+   }
+}
diff --git a/processing-app/src/main/java/glslfft/FftPass.java b/processing-app/src/main/java/glslfft/FftPass.java
index b561885..8c3069a 100644
--- a/processing-app/src/main/java/glslfft/FftPass.java
+++ b/processing-app/src/main/java/glslfft/FftPass.java
@@ -10,7 +10,9 @@ public class FftPass<B> {
    // The output buffer is being filled by this pass
    B output;
 
+   // Normalisation factor used for the inverse transform
    float normalization;
+
    float subtransformSize;
 
    // 1/width
diff --git a/processing-app/src/main/java/glslfft/GlslFft.java b/processing-app/src/main/java/glslfft/GlslFft.java
index 115ad06..ef8d5ae 100644
--- a/processing-app/src/main/java/glslfft/GlslFft.java
+++ b/processing-app/src/main/java/glslfft/GlslFft.java
@@ -4,20 +4,17 @@
 import com.thomasdiewald.pixelflow.java.DwPixelFlow;
 import com.thomasdiewald.pixelflow.java.dwgl.DwGLSLProgram;
 import com.thomasdiewald.pixelflow.java.dwgl.DwGLTexture;
-import processing.core.PApplet;
 import turingpatterns.Complex;
 
 import java.nio.FloatBuffer;
 import java.util.ArrayList;
 import java.util.List;
-
-import static processing.core.PApplet.print;
-import static processing.core.PApplet.println;
+import static processing.core.PApplet.*;
 
 /**
  * This class provides a simplified interface to run an FFT on the GPU
  * using OpenGL and a GLSL fragment shader.
- *
+ * <p>
  * This is a port of the glsl-fft javascript library that can be found here https://github.com/rreusser/glsl-fft
  */
 public class GlslFft {
@@ -25,8 +22,8 @@ public class GlslFft {
    private final DwPixelFlow context;
    private final DwGLSLProgram fft;
 
-   public GlslFft(PApplet applet) {
-      this.context = new DwPixelFlow(applet);
+   public GlslFft(DwPixelFlow context) {
+      this.context = context;
       this.fft = this.context.createShader("glslfft/fft.frag");
    }
 
@@ -107,22 +104,16 @@ private <B> List<FftPass<B>> fftPasses(
          } else {
             pass.output = pong;
          }
-
-         if (i == 0) {
-            if (pass.forward) {
-               pass.normalization = 1;
-            } else {
-               pass.normalization = 1.0f / width / height;
-            }
-         } else {
-            pass.normalization = 1;
+         pass.normalization = 1.0f;
+         if (i == 0 && !pass.forward) {
+            pass.normalization = 1.0f / width / height;
          }
 
          pass.subtransformSize = (float) Math.pow(2, (pass.horizontal ? i : (i - xIterations)) + 1);
 
          passes.add(pass);
 
-         // Swap the image buffers
+         // Swap the buffers
          B tmp = ping;
          ping = pong;
          pong = tmp;
@@ -136,7 +127,7 @@ private <B> List<FftPass<B>> fftPasses(
     */
    public void runPasses(List<FftPass<FftBuffer>> passes) {
       int count = 0;
-      boolean debug = false;
+      boolean debug = true;
       for (FftPass<FftBuffer> pass : passes) {
          this.context.begin();
          this.context.beginDraw(pass.output.buf);
@@ -158,10 +149,10 @@ public void runPasses(List<FftPass<FftBuffer>> passes) {
 
          if(debug) {
             println("------------- Pass " + count + ": Inputs -------------");
-            printComplex(pass.input);
+            println(format(loadLayer0(pass.input)));
 
             println("------------- Pass " + count + ": Outputs -------------");
-            printComplex(pass.output);
+            println(format(loadLayer0(pass.output)));
             println();
          }
          count++;
@@ -173,9 +164,9 @@ public void runPasses(List<FftPass<FftBuffer>> passes) {
     * We can pack two complex number matrices into the four channels of one texture.
     */
    public FloatBuffer prepare(Complex[][] data0, Complex[][] data1, int width, int height) {
-      if (width != height) {
-         throw new IllegalArgumentException("For some reason the glsl based FFT doesn't work on non-square data and I haven't debugged it yet");
-      }
+//      if (width != height) {
+//         throw new IllegalArgumentException("For some reason the glsl based FFT doesn't work on non-square data and I haven't debugged it yet");
+//      }
       float[] fdata = new float[width * height * 4];
       for (int y = 0; y < height; y++) {
          for (int x = 0; x < width; x++) {
@@ -216,7 +207,7 @@ private Complex[][] loadResult(GlslFft.FftBuffer fft, int offset) {
       float[] data = fft.buf.getFloatTextureData(new float[width * height * 4]);
       for (int y = 0; y < height; y++) {
          for (int x = 0; x < width; x++) {
-            int idx = 4 * (y * fft.buf.w + x);
+            int idx = 4 * (y * width + x);
             float re = data[idx + offset];
             float im = data[idx + offset + 1];
             result[y][x] = new Complex(re, im);
@@ -225,6 +216,19 @@ private Complex[][] loadResult(GlslFft.FftBuffer fft, int offset) {
       return result;
    }
 
+   public String format(Complex[][] data) {
+      StringBuilder buffer = new StringBuilder();
+      int ydim = data.length;
+      int xdim = data[0].length;
+      for (int y = 0; y < ydim; y++) {
+         for (int x = 0; x < xdim; x++) {
+            buffer.append(data[y][x]).append(", ");
+         }
+         buffer.append("\n");
+      }
+      return buffer.toString();
+   }
+
    /**
     * Create a new buffer the the provided dimensions that is configured to hold 2 complex number matrices.
     */
@@ -240,26 +244,12 @@ public FftBuffer newBuffer(int width, int height) {
     * {@link #prepare(Complex[][], Complex[][], int, int)}
     */
    public FftBuffer newBuffer(int width, int height, FloatBuffer data) {
-      if (width != height) {
-         throw new IllegalArgumentException("For some reason the glsl based FFT doesn't work on non-square data and I haven't debugged it yet");
-      }
+//      if (width != height) {
+//         throw new IllegalArgumentException("For some reason the glsl based FFT doesn't work on non-square data and I haven't debugged it yet");
+//      }
       return new FftBuffer(this.context, width, height, data);
    }
 
-   private void printComplex(GlslFft.FftBuffer fft) {
-      float[] data = fft.buf.getFloatTextureData(new float[4 * 4 * 4]);
-      for (int y = 0; y < 4; y++) {
-         for (int x = 0; x < 4; x++) {
-            int idx = 4 * (y * fft.buf.w + x);
-            float real = data[idx];
-            float img = data[idx + 1];
-
-            print(real + "+" + img + "j, ");
-         }
-         println();
-      }
-   }
-
    public static class FftBuffer {
       // RGBA texture, with 32-bit floats in each channel
       DwGLTexture buf = new DwGLTexture();
@@ -268,7 +258,8 @@ private FftBuffer(DwPixelFlow context, int width, int height, FloatBuffer data)
          this.buf.resize(
              context,
              GL2.GL_RGBA32F,
-             width, height,
+             width,
+             height,
              GL2.GL_RGBA,
              GL2.GL_FLOAT,
              GL2.GL_NEAREST,
diff --git a/processing-app/src/main/java/glslfft/GlslFftTest.java b/processing-app/src/main/java/glslfft/GlslFftTest.java
index fddf8d4..a410ee1 100644
--- a/processing-app/src/main/java/glslfft/GlslFftTest.java
+++ b/processing-app/src/main/java/glslfft/GlslFftTest.java
@@ -1,5 +1,6 @@
 package glslfft;
 
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
 import processing.core.PApplet;
 import turingpatterns.Complex;
 import turingpatterns.FFT;
@@ -12,45 +13,14 @@
 /**
  * This class tests the GLSL based FFT implementation
  */
-public class GlslFftTest extends PApplet {
+public class GlslFftTest extends AppletTest {
 
    private GlslFft fft;
 
-   @Override
-   public void settings() {
-      size(0, 0, P2D);
-   }
-
    @Override
    public void setup() {
-      fft = new GlslFft(this);
-
-      StringBuilder result = new StringBuilder();
-      try {
-         for(Method m :this.getClass().getDeclaredMethods()) {
-            if(m.getName().startsWith("test") && m.getParameterCount() == 0) {
-               boolean success;
-               try {
-                  println("==> Starting: " + m.getName());
-                  m.invoke(this);
-                  success = true;
-               } catch(Exception e) {
-                  e.printStackTrace(System.out);
-                  success = false;
-               }
-               println("<== Finished: " + m.getName());
-               if(success) {
-                  result.append(m.getName()).append(": ✅ \n");
-               } else {
-                  result.append(m.getName()).append(": ❌ \n");
-               }
-            }
-         }
-      } finally {
-         println("\nTest Results Summary: ");
-         println(result);
-         exit();
-      }
+      fft = new GlslFft(new DwPixelFlow(this));
+      super.setup();
    }
 
    public void testForwardFFT_Reals() {
@@ -69,7 +39,7 @@ public void testForwardFFT_Reals() {
       assertEqual(expected, actual1);
    }
 
-   public void testForwardFFT_RealAndImaginary() {
+   public void _testForwardFFT_RealAndImaginary() {
       Complex[][] data = new Complex[][]{
           {c(1, 16), c(2, 15), c(3, 14), c(4, 13)},
           {c(5, 12), c(6, 11), c(7, 10), c(8, 9)},
@@ -86,31 +56,23 @@ public void testForwardFFT_RealAndImaginary() {
    }
 
    // Non-square matrices don't work at the moment. Not sure why.
-//   public void _testForwardFFT_Reals_8x4() {
-//      List<FftPass<String>> forward = fft.forward("in", "ping", "pong", "out", 16, 8);
-//      println("Passes: ");
-//      for(FftPass<String> pass : forward) {
-//         println(pass);
-//      }
-//
-//
-//      Complex[][] data = new Complex[][]{
-//          {c(1, 0), c(2, 0), c(3, 0), c(4, 0), c(5, 0), c(6, 0), c(7, 0), c(8, 0) },
-//          {c(9, 0), c(10, 0), c(11, 0), c(12, 0), c(13, 0), c(14, 0), c(15, 0), c(16, 0) },
-//          {c(17, 0), c(18, 0), c(19, 0), c(20, 0), c(21, 0), c(22, 0), c(23, 0), c(24, 0) },
-//          {c(25, 0), c(27, 0), c(27, 0), c(28, 0), c(29, 0), c(30, 0), c(31, 0), c(32, 0) },
-//      };
-//
-//      int width = 8;
-//      int height = 4;
-//
-//      Complex[][] expected = FFT.fft2d(data);
-//      Complex[][] actual = forward(data, width, height);
-//
-//      assertEqual(expected, actual);
-//   }
-
-   public void testInverseFFT_Reals() {
+   public void testForwardFFT_Reals_NonSquare() {
+
+      Complex[][] data = new Complex[][]{
+          {c(1, 0), c(2, 0), c(3, 0), c(4, 0)},
+          {c(5, 0), c(6, 0), c(7, 0), c(8, 0)}
+      };
+
+      int width = 4;
+      int height = 2;
+
+      Complex[][] expected = FFT.fft2d(data);
+      Complex[][] actual = forward(data, null, width, height).layer0;
+
+      assertEqual(expected, actual);
+   }
+
+   public void _testInverseFFT_Reals() {
       Complex[][] data = new Complex[][]{
           {c(1, 0), c(2, 0), c(3, 0), c(4, 0)},
           {c(5, 0), c(6, 0), c(7, 0), c(8, 0)},
@@ -127,7 +89,7 @@ public void testInverseFFT_Reals() {
       assertEqual(expected, actual1);
    }
 
-   public void testInverseFFT_RealAndImaginary() {
+   public void _testInverseFFT_RealAndImaginary() {
       Complex[][] data = new Complex[][]{
           {c(1, 16), c(2, 15), c(3, 14), c(4, 13)},
           {c(5, 12), c(6, 11), c(7, 10), c(8, 9)},
@@ -143,7 +105,7 @@ public void testInverseFFT_RealAndImaginary() {
       assertEqual(expected, actual1);
    }
 
-   public void testForwardFFT_1000_Random_32x32_RealAndImaginary() {
+   public void _testForwardFFT_1000_Random_32x32_RealAndImaginary() {
       int width = 32;
       int height = 32;
 
@@ -161,7 +123,7 @@ public void testForwardFFT_1000_Random_32x32_RealAndImaginary() {
       }
    }
 
-   public void testInverseFFT_1000_Random_32x32_RealAndImaginary() {
+   public void _testInverseFFT_1000_Random_32x32_RealAndImaginary() {
       int width = 32;
       int height = 32;
 
@@ -179,7 +141,7 @@ public void testInverseFFT_1000_Random_32x32_RealAndImaginary() {
       }
    }
 
-   public void testForwardThenInverseFFT_Reals() {
+   public void _testForwardThenInverseFFT_Reals() {
       Complex[][] expected = new Complex[][]{
           {c(1, 0), c(2, 0), c(3, 0), c(4, 0)},
           {c(5, 0), c(6, 0), c(7, 0), c(8, 0)},
@@ -196,7 +158,7 @@ public void testForwardThenInverseFFT_Reals() {
       assertEqual(actualJava, actualGlsl);
    }
 
-   public void testForwardThenInverseFFT_RealAndImaginary() {
+   public void _testForwardThenInverseFFT_RealAndImaginary() {
       Complex[][] expected = new Complex[][]{
           {c(1, 16), c(2, 15), c(3, 14), c(4, 13)},
           {c(5, 12), c(6, 11), c(7, 10), c(8, 9)},
@@ -213,7 +175,7 @@ public void testForwardThenInverseFFT_RealAndImaginary() {
       assertEqual(actualJava, actualGlsl);
    }
 
-   public void testForwardThenInverseFFT_1000_Random_32x32_RealAndImaginary() {
+   public void _testForwardThenInverseFFT_1000_Random_32x32_RealAndImaginary() {
       for(int i = 0; i < 1000; i++) {
          int width = 32;
          int height = 32;
@@ -253,21 +215,8 @@ private void assertEqual(Complex[][] expected, Complex[][] actual) {
       }
       boolean eq = Arrays.deepEquals(expected, actual);
       if (!eq) {
-         throw new AssertionError("\nExpected: \n" + format(expected) + "\n Actual:\n" + format(actual));
-      }
-   }
-
-   String format(Complex[][] data) {
-      StringBuilder buffer = new StringBuilder();
-      int ydim = data.length;
-      int xdim = data[0].length;
-      for (int y = 0; y < ydim; y++) {
-         for (int x = 0; x < xdim; x++) {
-            buffer.append(data[y][x]).append(", ");
-         }
-         buffer.append("\n");
+         throw new AssertionError("\nExpected: \n" + fft.format(expected) + "\n Actual:\n" + fft.format(actual));
       }
-      return buffer.toString();
    }
 
    static class ResultLayers {
@@ -275,18 +224,18 @@ static class ResultLayers {
       Complex[][] layer1;
    }
 
-   private ResultLayers forward(Complex[][] data0, Complex[][] data1, int w, int h) {
-      FloatBuffer texData = fft.prepare(data0, data1, w, h);
-      GlslFft.FftBuffer input = fft.newBuffer(w, h, texData);
-      GlslFft.FftBuffer ping = fft.newBuffer(w, h);
-      GlslFft.FftBuffer pong = fft.newBuffer(w, h);
-      GlslFft.FftBuffer output = fft.newBuffer(w, h);
-
-      List<FftPass<GlslFft.FftBuffer>> forward = fft.forward(input, ping, pong, output, w, h);
-//      println("Forward Passes: ");
-//      for(FftPass<GlslFft.FftBuffer> pass : forward) {
-//         println(pass);
-//      }
+   private ResultLayers forward(Complex[][] data0, Complex[][] data1, int width, int height) {
+      FloatBuffer texData = fft.prepare(data0, data1, width, height);
+      GlslFft.FftBuffer input = fft.newBuffer(width, height, texData);
+      GlslFft.FftBuffer ping = fft.newBuffer(width, height);
+      GlslFft.FftBuffer pong = fft.newBuffer(width, height);
+      GlslFft.FftBuffer output = fft.newBuffer(width, height);
+
+      List<FftPass<GlslFft.FftBuffer>> forward = fft.forward(input, ping, pong, output, width, height);
+      println("Forward Passes: ");
+      for(FftPass<GlslFft.FftBuffer> pass : forward) {
+         println(pass);
+      }
 
       fft.runPasses(forward);
 
diff --git a/processing-app/src/main/java/prefixsum/Buffer.java b/processing-app/src/main/java/prefixsum/Buffer.java
new file mode 100644
index 0000000..9e1000a
--- /dev/null
+++ b/processing-app/src/main/java/prefixsum/Buffer.java
@@ -0,0 +1,31 @@
+package prefixsum;
+
+import com.jogamp.opengl.GL2;
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import com.thomasdiewald.pixelflow.java.dwgl.DwGLTexture;
+
+import java.nio.FloatBuffer;
+
+public class Buffer {
+      // Single channel texture of 32-bit float values
+      public DwGLTexture buf = new DwGLTexture();
+
+      public Buffer(DwPixelFlow context, int width, int height, FloatBuffer data) {
+         this.buf.resize(
+             context,
+             GL2.GL_R32F,
+             width,
+             height,
+             GL2.GL_RED,
+             GL2.GL_FLOAT,
+             GL2.GL_NEAREST,
+             GL2.GL_CLAMP_TO_BORDER,
+             1,
+             4,
+             data
+         );
+         if (data == null) {
+            this.buf.clear(0.0f);
+         }
+      }
+   }
diff --git a/processing-app/src/main/java/prefixsum/PrefixSum.java b/processing-app/src/main/java/prefixsum/PrefixSum.java
new file mode 100644
index 0000000..d153ea2
--- /dev/null
+++ b/processing-app/src/main/java/prefixsum/PrefixSum.java
@@ -0,0 +1,140 @@
+package prefixsum;
+
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import com.thomasdiewald.pixelflow.java.dwgl.DwGLSLProgram;
+
+import java.nio.FloatBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+public class PrefixSum {
+
+   private final DwPixelFlow context;
+   private final DwGLSLProgram sum;
+
+
+   public PrefixSum(DwPixelFlow context) {
+      this.context = context;
+      this.sum = this.context.createShader("prefixsum/prefixsum.frag");
+   }
+
+   public Buffer run(Buffer input, Buffer pingBuf, Buffer pingPong, int width, int height) {
+      List<Pass<Buffer>> passes = prefixSumPasses(input, pingBuf, pingPong, width, height);
+      return runPasses(passes);
+   }
+
+   public <B> List<Pass<B>> prefixSumPasses(B input, B pingBuf, B pingPong, int width, int height) {
+      if (Integer.highestOneBit(width) != width) {
+         throw new IllegalArgumentException("width (" + width + ") is not a power of 2");
+      }
+
+      if (Integer.highestOneBit(height) != height) {
+         throw new IllegalArgumentException("height (" + height + ") is not a power of 2");
+      }
+
+      B ping = pingBuf;
+      B pong = pingPong;
+      List<Pass<B>> passes = new ArrayList<>();
+      int passCount = (int) (Math.log(width) / Math.log(2));
+      for (int i = 0; i < passCount; i++) {
+         Pass<B> pass = new Pass<>();
+
+         if(i == 0) {
+            pass.input = input;
+         } else {
+            pass.input = ping;
+         }
+         pass.output = pong;
+
+         pass.resolutionX = 1.0f / width;
+         pass.resolutionY = 1.0f / height;
+         pass.stride = (int) Math.pow(2, i);
+
+         passes.add(pass);
+
+         // Swap the buffers
+         B tmp = ping;
+         ping = pong;
+         pong = tmp;
+      }
+
+      return passes;
+   }
+
+   public Buffer runPasses(List<Pass<Buffer>> passes) {
+      Buffer output = null;
+      for (Pass<Buffer> pass : passes) {
+         this.context.begin();
+         this.context.beginDraw(pass.output.buf);
+
+         this.sum.begin();
+         this.sum.uniformTexture("src", pass.input.buf);
+         this.sum.uniform2f("resolution", pass.resolutionX, pass.resolutionY);
+         this.sum.uniform1i("stride", pass.stride);
+         this.sum.drawFullScreenQuad();
+
+         this.sum.end();
+
+         context.endDraw();
+         context.end();
+         output = pass.output;
+      }
+
+      return output;
+   }
+
+   public static class Pass<B> {
+      B input;
+      B output;
+      float resolutionX;
+      float resolutionY;
+      int stride;
+
+      @Override
+      public String toString() {
+         return "Pass{" +
+             "input=" + input +
+             ", output=" + output +
+             ", resolutionX=" + resolutionX +
+             ", resolutionY=" + resolutionY +
+             ", stride=" + stride +
+             '}';
+      }
+   }
+
+   FloatBuffer prepare(float[][] input) {
+      int height = input.length;
+      int width = input[0].length;
+      float[] buf = new float[width * height];
+      for (int y = 0; y < height; y++) {
+         for (int x = 0; x < width; x++) {
+            int idx = y * width + x;
+            buf[idx] = input[y][x];
+         }
+      }
+      return FloatBuffer.wrap(buf);
+   }
+
+   public Buffer newBuffer(int width, int height) {
+      return new Buffer(this.context, width, height, null);
+   }
+
+   public Buffer newBuffer(int width, int height, FloatBuffer buffer) {
+      return new Buffer(this.context, width, height, buffer);
+   }
+
+   public float[][] read(Buffer sum) {
+      int width = sum.buf.w;
+      int height = sum.buf.h;
+      float[][] result = new float[height][width];
+      float[] data = sum.buf.getFloatTextureData(new float[width * height]);
+      for (int y = 0; y < height; y++) {
+         for (int x = 0; x < width; x++) {
+            int idx = y * width + x;
+            result[y][x] = data[idx];
+         }
+      }
+      return result;
+   }
+
+}
diff --git a/processing-app/src/main/java/prefixsum/PrefixSumBlur.java b/processing-app/src/main/java/prefixsum/PrefixSumBlur.java
new file mode 100644
index 0000000..a5751a6
--- /dev/null
+++ b/processing-app/src/main/java/prefixsum/PrefixSumBlur.java
@@ -0,0 +1,75 @@
+package prefixsum;
+
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import com.thomasdiewald.pixelflow.java.dwgl.DwGLSLProgram;
+
+import java.nio.FloatBuffer;
+import java.util.Arrays;
+import java.util.List;
+
+public class PrefixSumBlur {
+   private final DwPixelFlow context;
+   private final PrefixSum sum;
+   private final DwGLSLProgram blur;
+
+   public PrefixSumBlur(DwPixelFlow context) {
+      this.context = context;
+      this.sum = new PrefixSum(context);
+      this.blur = this.context.createShader("prefixsum/striped-prefixsum-blur.frag");
+   }
+
+   public Buffer newBuffer(int width, int height) {
+      return sum.newBuffer(width, height);
+   }
+
+   public Buffer newBuffer(int width, int height, FloatBuffer buffer) {
+      return sum.newBuffer(width, height, buffer);
+   }
+
+   public float[][] blur(float[][] data, int radius) {
+      int h = data.length;
+      int w = data[0].length;
+      Buffer input = sum.newBuffer(w, h, sum.prepare(data));
+      Buffer ping = sum.newBuffer(w, h);
+      Buffer pong = sum.newBuffer(w, h);
+
+      List<PrefixSum.Pass<Buffer>> passes = sum.prefixSumPasses(input, ping, pong, w, h);
+      Buffer out = sum.runPasses(passes);
+
+//      System.out.println("PrefixSum");
+//      print(sum.read(out));
+
+      Buffer blurOut = out == ping ? pong : ping;
+
+      blur(out, blurOut, w, h, radius);
+      return this.sum.read(blurOut);
+   }
+
+   private void print(float[][] data) {
+      for(float[] row : data) {
+         System.out.println(Arrays.toString(row));
+      }
+   }
+
+   public void blur(
+       Buffer prefixSum,
+       Buffer output,
+       int width, int height,
+       int radius) {
+      this.context.begin();
+      this.context.beginDraw(output.buf);
+
+      this.blur.begin();
+      this.blur.uniformTexture("prefixSum", prefixSum.buf);
+      this.blur.uniform2f("resolution", 1.0f / width, 1.0f / height);
+      this.blur.uniform1i("radius", radius);
+      this.blur.uniform1i("width", width);
+      this.blur.drawFullScreenQuad();
+
+      this.blur.end();
+
+      this.context.endDraw();
+      this.context.end();
+
+   }
+}
diff --git a/processing-app/src/main/java/prefixsum/PrefixSumBlurRender.java b/processing-app/src/main/java/prefixsum/PrefixSumBlurRender.java
new file mode 100644
index 0000000..459701f
--- /dev/null
+++ b/processing-app/src/main/java/prefixsum/PrefixSumBlurRender.java
@@ -0,0 +1,77 @@
+package prefixsum;
+
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import com.thomasdiewald.pixelflow.java.dwgl.DwGLSLProgram;
+import com.thomasdiewald.pixelflow.java.imageprocessing.filter.Copy;
+import com.thomasdiewald.pixelflow.java.imageprocessing.filter.DwFilter;
+import processing.core.PApplet;
+import processing.opengl.PGraphicsOpenGL;
+
+public class PrefixSumBlurRender extends PApplet {
+
+   private DwPixelFlow context;
+   private PrefixSum sum;
+   private PrefixSumBlur blur;
+
+   @Override
+   public void settings() {
+      size(512, 512, P2D);
+
+   }
+
+   @Override
+   public void setup() {
+      this.context = new DwPixelFlow(this);
+      this.sum = new PrefixSum(context);
+      this.blur = new PrefixSumBlur(context);
+
+      DwGLSLProgram shader = this.context.createShader("prefixsum/turing-pattern-step.frag");
+
+      float[][] input1 = createInput(200);
+      Buffer buffer1 = sum.newBuffer(width, height, sum.prepare(input1));
+
+      float[][] input2 = createInput(80);
+      Buffer buffer2 = sum.newBuffer(width, height, sum.prepare(input2));
+
+
+      Buffer out = sum.newBuffer(width, height);
+
+      this.context.begin();
+      this.context.beginDraw(out.buf);
+
+      shader.begin();
+      shader.uniformTexture("test[0]", buffer1.buf);
+      shader.uniformTexture("test[1]", buffer2.buf);
+      shader.uniform2f("resolution", 1.0f / width, 1.0f / height);
+      shader.drawFullScreenQuad();
+
+      shader.end();
+
+      this.context.endDraw();
+      this.context.end();
+
+      Copy copy = DwFilter.get(context).copy;
+      copy.apply(out.buf, (PGraphicsOpenGL) g);
+   }
+
+   public static void main(String[] args) {
+      PApplet.main(PrefixSumBlurRender.class);
+   }
+
+   private float[][] createInput(int radius) {
+      float[][] data = new float[height][width];
+      for(int y = 0; y < height; y++) {
+         for(int x = 0; x < width; x++) {
+            int cx = x - height/2;
+            int cy = y - width/2;
+            double d = Math.sqrt(cx * cx + cy * cy);
+            if(d < radius) {
+               data[y][x] = 1.0f;
+            } else {
+               data[y][x] = 0.0f;
+            }
+         }
+      }
+      return data;
+   }
+}
diff --git a/processing-app/src/main/java/prefixsum/PrefixSumBlurTest.java b/processing-app/src/main/java/prefixsum/PrefixSumBlurTest.java
new file mode 100644
index 0000000..60bcee8
--- /dev/null
+++ b/processing-app/src/main/java/prefixsum/PrefixSumBlurTest.java
@@ -0,0 +1,57 @@
+package prefixsum;
+
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import glslfft.AppletTest;
+import processing.core.PApplet;
+
+import java.util.Arrays;
+
+public class PrefixSumBlurTest extends AppletTest {
+   private PrefixSumBlur blur;
+
+   @Override
+   public void setup() {
+      this.blur = new PrefixSumBlur(new DwPixelFlow(this));
+      super.setup();
+   }
+
+   public void testIt() {
+      float[][] input = createInput();
+      println("Input");
+      print(input);
+      println();
+
+      float[][] blurred = this.blur.blur(input, 1);
+      println("Output");
+      print(blurred);
+   }
+
+   private void print(float[][] data) {
+      for(float[] row : data) {
+         println(Arrays.toString(row));
+      }
+   }
+
+   private float[][] createInput() {
+      int w = 8;
+      int h = 8;
+      float[][] data = new float[h][w];
+      for(int y = 0; y < h; y++) {
+         for(int x = 0; x < w; x++) {
+            int cx = x - h/2;
+            int cy = y - w/2;
+            double d = Math.sqrt(cx * cx + cy * cy);
+            if(d < 2) {
+               data[y][x] = 1.0f;
+            } else {
+               data[y][x] = 0.0f;
+            }
+         }
+      }
+      return data;
+   }
+
+   public static void main(String[] args) {
+      PApplet.main(PrefixSumBlurTest.class);
+   }
+}
diff --git a/processing-app/src/main/java/prefixsum/PrefixSumTest.java b/processing-app/src/main/java/prefixsum/PrefixSumTest.java
new file mode 100644
index 0000000..0097d00
--- /dev/null
+++ b/processing-app/src/main/java/prefixsum/PrefixSumTest.java
@@ -0,0 +1,197 @@
+package prefixsum;
+
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import glslfft.AppletTest;
+import processing.core.PApplet;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class PrefixSumTest extends AppletTest {
+
+   private PrefixSum sum;
+
+   @Override
+   public void setup() {
+      this.sum = new PrefixSum(new DwPixelFlow(this));
+      super.setup();
+
+   }
+
+
+   public void test4x1() {
+      float[][] data = new float[][]{
+          {1, 2, 3, 4},
+      };
+      runTest(data);
+   }
+
+   public void test4x2() {
+      float[][] data = new float[][]{
+          {1, 2, 3, 4},
+          {5, 6, 7, 8}
+      };
+      runTest(data);
+   }
+
+   public void test4x4() {
+      float[][] data = new float[][]{
+          {1, 2, 3, 4},
+          {5, 6, 7, 8},
+          {9, 10, 11, 12},
+          {13, 14, 15, 16},
+      };
+      runTest(data);
+   }
+
+   public void test2x4() {
+      float[][] data = new float[][]{
+          {1, 2},
+          {5, 6},
+          {9, 10},
+          {13, 14},
+      };
+      runTest(data);
+   }
+
+   public void test2000_RandomInputs() {
+      for (int i = 0; i < 2000; i++) {
+         int width = (int) pow(2, (int) random(1, 8));
+         int height = (int) pow(2, (int) random(1, 8));
+         println("width=" + width + ", height=" + height);
+         float[][] data = randomMatrix(width, height);
+         runTest(data);
+      }
+   }
+
+   public void _testTimingsOnLargeInput() {
+      int w = 8192;
+      int h = 8192;
+      float[][] data = randomMatrix(w, h);
+
+
+      {
+         for(int i = 0; i < 20; i++) {
+            gpuPrefixSum(data, false);
+         }
+
+         gpuPrefixSum(data, true);
+
+      }
+
+      {
+         // Warm up
+         for(int i = 0; i < 20; i++) {
+            cpuPrefixSum(data);
+         }
+
+         long start = System.nanoTime();
+         cpuPrefixSum(data);
+         long end = System.nanoTime();
+         println("Java");
+         println("------");
+         println("  Java Version: " + (end - start) / 1000 + " micros");
+
+      }
+   }
+
+   private float[][] randomMatrix(int w, int h) {
+      float[][] result = new float[h][w];
+      for (int y = 0; y < h; y++) {
+         for (int x = 0; x < w; x++) {
+            result[y][x] = random(-1, 1);
+         }
+      }
+      return result;
+   }
+
+   private void runTest(float[][] data) {
+
+      int w = data[0].length;
+      int h = data.length;
+
+      Buffer input = sum.newBuffer(w, h, sum.prepare(data));
+      Buffer ping = sum.newBuffer(w, h);
+      Buffer pong = sum.newBuffer(w, h);
+
+      List<PrefixSum.Pass<Buffer>> passes = sum.prefixSumPasses(input, ping, pong, w, h);
+      passes.forEach(PApplet::println);
+
+      Buffer output = sum.runPasses(passes);
+
+      float[][] expected = cpuPrefixSum(data);
+      float[][] actual = sum.read(output);
+
+      assertEquals(expected, actual);
+   }
+
+   private void assertEquals(float[][] expected, float[][] actual) {
+      StringBuilder builder = new StringBuilder();
+      boolean eq = true;
+      for (int y = 0; y < height; y++) {
+         for (int x = 0; x < width; x++) {
+            float delta = Math.abs(expected[y][x] - actual[y][x]);
+            if (delta > 0.05) {
+               eq = false;
+               builder.append("Mismatch at: (" + x + ", " + y + "): Expected: " + expected[y][x] + ", Actual: " + actual[y][x]);
+            }
+         }
+      }
+
+      if (!eq) {
+         throw new AssertionError(
+             builder + "\nExpected: \n" + Arrays.deepToString(expected) +
+                 "\n Actual:\n" + Arrays.deepToString(actual));
+      }
+   }
+
+   private float[][] gpuPrefixSum(float[][] data, boolean printTimings) {
+      int w = data[0].length;
+      int h = data.length;
+      long start = System.nanoTime();
+      Buffer input = sum.newBuffer(w, h, sum.prepare(data));
+      Buffer ping = sum.newBuffer(w, h);
+      Buffer pong = sum.newBuffer(w, h);
+      long allocate = System.nanoTime();
+
+      List<PrefixSum.Pass<Buffer>> passes = sum.prefixSumPasses(input, ping, pong, w, h);
+      Buffer output = sum.runPasses(passes);
+      println(passes);
+      long passExec = System.nanoTime();
+
+      float[][] read = sum.read(output);
+
+      long end = System.nanoTime();
+
+      if(printTimings) {
+         println("Shader");
+         println("------");
+         println("Buffer Allocation: " + (allocate - start) / 1000 + " micros");
+         println("   Pass Execution: " + (passExec - allocate) / 1000 + " micros");
+         println("     Read Texture: " + (end - passExec) / 1000 + " micros");
+         println("            Total: " + (end - start) / 1000 + " micros");
+         println();
+      }
+
+      return read;
+   }
+
+   private float[][] cpuPrefixSum(float[][] data) {
+      float[][] result = new float[data.length][data[0].length];
+      for (int i = 0; i < data.length; i++) {
+         float[] input = data[i];
+
+         float sum = input[0];
+         result[i][0] = input[0];
+         for (int j = 1; j < input.length; j++) {
+            sum += input[j];
+            result[i][j] = sum;
+         }
+      }
+      return result;
+   }
+
+   public static void main(String[] args) {
+      PApplet.main(PrefixSumTest.class);
+   }
+}
diff --git a/processing-app/src/main/java/turingpatterns_gpu/GpuGrid.java b/processing-app/src/main/java/turingpatterns_gpu/GpuGrid.java
new file mode 100644
index 0000000..9b4e903
--- /dev/null
+++ b/processing-app/src/main/java/turingpatterns_gpu/GpuGrid.java
@@ -0,0 +1,98 @@
+package turingpatterns_gpu;
+
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import com.thomasdiewald.pixelflow.java.dwgl.DwGLSLProgram;
+import prefixsum.Buffer;
+import prefixsum.PrefixSum;
+import processing.core.PApplet;
+
+import java.nio.FloatBuffer;
+import java.util.List;
+
+public class GpuGrid {
+
+   private int width, height;
+
+   private PApplet applet;
+   private DwPixelFlow context;
+   private PrefixSum prefixSum;
+
+   private DwGLSLProgram turingStep;
+
+   Buffer grid;
+
+   Buffer bufA;
+   Buffer bufB;
+
+   List<GpuScale> scales;
+
+   GpuGrid(int width, int height, PApplet applet, DwPixelFlow context) {
+      this.width = width;
+      this.height = height;
+      this.applet = applet;
+      this.context = context;
+
+      this.prefixSum = new PrefixSum(this.context);
+      this.grid = initGrid(width, height);
+      this.bufA = this.prefixSum.newBuffer(width, height);
+      this.bufB = this.prefixSum.newBuffer(width, height);
+
+      this.turingStep = this.context.createShader("prefixsum/turing-pattern-step.frag");
+   }
+
+   void setScales(List<GpuScale> scales) {
+      this.scales = scales;
+   }
+
+   private Buffer initGrid(int width, int height) {
+      int total = width*height;
+      float[] random = new float[total];
+      for(int i = 0; i < total; i++) {
+         random[i] = applet.random(-1, 1);
+      }
+      return this.prefixSum.newBuffer(width, height, FloatBuffer.wrap(random));
+   }
+
+   Buffer update() {
+      Buffer prefixSum = this.prefixSum.run(this.grid, this.bufA, this.bufB, width, height);
+      for(GpuScale scale : this.scales) {
+         scale.applyBlur(prefixSum);
+      }
+
+      runTuringStep();
+
+      return prefixSum;
+   }
+
+   void runTuringStep() {
+      this.context.begin();
+      this.context.beginDraw(this.bufA.buf);
+
+      this.turingStep.begin();
+      this.turingStep.uniformTexture("grid", this.grid.buf);
+      this.turingStep.uniform1i("scaleCount", this.scales.size());
+      this.turingStep.uniform2f("resolution", 1.0f / width, 1.0f / height);
+
+      for(int i = 0; i < this.scales.size(); i++) {
+         GpuScale scale = this.scales.get(i);
+         this.turingStep.uniformTexture("activator[" + i + "]", scale.activator.buf);
+         this.turingStep.uniformTexture("inhibitor[" + i + "]", scale.inhibitor.buf);
+         this.turingStep.uniform1f("bumpAmount[" + i + "]", scale.config.smallAmount);
+      }
+
+      this.turingStep.drawFullScreenQuad();
+
+      this.turingStep.end();
+
+      this.context.endDraw();
+      this.context.end();
+
+      // Swap bufA and grid
+      Buffer tmp = grid;
+      this.grid = this.bufA;
+      this.bufA = tmp;
+
+//      this.turingStep.clearUniformTextures();
+   }
+
+}
diff --git a/processing-app/src/main/java/turingpatterns_gpu/GpuScale.java b/processing-app/src/main/java/turingpatterns_gpu/GpuScale.java
new file mode 100644
index 0000000..c6dce16
--- /dev/null
+++ b/processing-app/src/main/java/turingpatterns_gpu/GpuScale.java
@@ -0,0 +1,28 @@
+package turingpatterns_gpu;
+
+import prefixsum.Buffer;
+import prefixsum.PrefixSumBlur;
+import turingpatterns.config.ScaleConfig;
+
+public class GpuScale {
+   ScaleConfig config;
+   PrefixSumBlur blur;
+
+   Buffer activator;
+   Buffer inhibitor;
+
+   GpuScale(ScaleConfig config, PrefixSumBlur blur) {
+      this.config = config;
+      this.blur = blur;
+      this.activator = blur.newBuffer(config.width, config.height);
+      this.inhibitor = blur.newBuffer(config.width, config.height);
+   }
+
+   void applyBlur(Buffer inputPrefixSum) {
+      // Activator blur
+      blur.blur(inputPrefixSum, activator, config.width, config.height, config.activatorRadius);
+
+      // Inhibitor blur
+      blur.blur(inputPrefixSum, inhibitor, config.width, config.height, config.inhibitorRadius);
+   }
+}
diff --git a/processing-app/src/main/java/turingpatterns_gpu/GpuSketch.java b/processing-app/src/main/java/turingpatterns_gpu/GpuSketch.java
new file mode 100644
index 0000000..d1ce52f
--- /dev/null
+++ b/processing-app/src/main/java/turingpatterns_gpu/GpuSketch.java
@@ -0,0 +1,85 @@
+package turingpatterns_gpu;
+
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import com.thomasdiewald.pixelflow.java.imageprocessing.filter.Copy;
+import com.thomasdiewald.pixelflow.java.imageprocessing.filter.DwFilter;
+import prefixsum.Buffer;
+import prefixsum.PrefixSumBlur;
+import processing.core.PApplet;
+import processing.opengl.PGraphicsOpenGL;
+import turingpatterns.config.ScaleConfig;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class GpuSketch extends PApplet {
+   GrayscaleRender render;
+       PGraphicsOpenGL target;
+
+   DwPixelFlow context;
+   GpuGrid grid;
+
+   @Override
+   public void settings() {
+      size(512, 512, P2D);
+   }
+
+   @Override
+   public void setup() {
+      context = new DwPixelFlow(this);
+      target = (PGraphicsOpenGL) createGraphics(width, height, P2D);
+      PrefixSumBlur blur = new PrefixSumBlur(context);
+      render = new GrayscaleRender(context);
+      grid = new GpuGrid(
+          width,
+          height,
+          this,
+          context
+      );
+
+      List<GpuScale> scales = new ArrayList<>();
+      ScaleConfig config = ScaleConfig.newBuilder()
+          .size(width, height)
+          .smallAmount(0.05f)
+          .inhibitorRadius(50)
+          .activatorRadius(25)
+          .build();
+      scales.add(new GpuScale(
+          config,
+          blur
+      ));
+
+      grid.setScales(scales);
+
+      frameRate(1);
+
+   }
+
+   private void sleep(long m) {
+      try {
+         Thread.sleep(m);
+      } catch (InterruptedException e) {
+         e.printStackTrace();
+      }
+   }
+
+   void step() {
+      this.grid.update();
+      GpuScale scale = this.grid.scales.get(0);
+//      this.render.render(this.grid.grid, this.target);
+      this.render.render(scale.activator, this.target);
+
+      image(target, 0, 0);
+
+   }
+
+   @Override
+   public void draw() {
+      step();
+
+   }
+
+   public static void main(String[] args) {
+      PApplet.main(GpuSketch.class);
+   }
+}
diff --git a/processing-app/src/main/java/turingpatterns_gpu/GrayscaleRender.java b/processing-app/src/main/java/turingpatterns_gpu/GrayscaleRender.java
new file mode 100644
index 0000000..4c2dbfc
--- /dev/null
+++ b/processing-app/src/main/java/turingpatterns_gpu/GrayscaleRender.java
@@ -0,0 +1,31 @@
+package turingpatterns_gpu;
+
+import com.thomasdiewald.pixelflow.java.DwPixelFlow;
+import com.thomasdiewald.pixelflow.java.dwgl.DwGLSLProgram;
+import prefixsum.Buffer;
+import processing.opengl.PGraphicsOpenGL;
+
+public class GrayscaleRender {
+   private DwPixelFlow context;
+   DwGLSLProgram shader;
+
+   public GrayscaleRender(DwPixelFlow context) {
+      this.context = context;
+      shader = this.context.createShader("prefixsum/render-greyscale.frag");
+   }
+
+   public void render(Buffer grid, PGraphicsOpenGL g) {
+      this.context.begin();
+      this.context.beginDraw(g);
+
+      this.shader.begin();
+      this.shader.uniformTexture("grid", grid.buf);
+      this.shader.uniform2f("resolution", 1.0f / g.width, 1.0f / g.height);
+
+      this.shader.drawFullScreenQuad();
+      this.shader.end();
+
+      this.context.endDraw();
+      this.context.end();
+   }
+}
diff --git a/processing-app/src/main/resources/glslfft/circle-kernel.frag b/processing-app/src/main/resources/glslfft/circle-kernel.frag
new file mode 100644
index 0000000..4c20fda
--- /dev/null
+++ b/processing-app/src/main/resources/glslfft/circle-kernel.frag
@@ -0,0 +1,33 @@
+#version 410
+
+#ifdef GL_ES
+precision highp float;
+precision mediump int;
+#endif
+
+const float PI = 3.14159265358979;
+
+uniform vec2 resolution;
+
+out vec4 glFragColor;
+
+float distanceSq(vec2 a, vec2 b) {
+    vec2 c = a - b;
+    return dot(c, c);
+}
+
+void main() {
+    float radius = 100;
+    float radiusSq = radius * radius;
+//    float area = PI * radius * radius;
+
+    float d1 = distanceSq(gl_FragCoord.xy, vec2(0, 0));
+    float d2 = distanceSq(gl_FragCoord.xy, resolution);
+    float d3 = distanceSq(gl_FragCoord.xy, vec2(0, resolution.y));
+    float d4 = distanceSq(gl_FragCoord.xy, vec2(resolution.x, 0));
+
+    vec4 vs = 1-step(radiusSq, vec4(d1, d2, d3, d4));
+    float sum = vs.x + vs.y + vs.z + vs.w;
+
+    glFragColor = vec4(vec3(sum), 1.0);
+}
diff --git a/processing-app/src/main/resources/glslfft/fft.frag b/processing-app/src/main/resources/glslfft/fft.frag
index 65fb656..fceb8e0 100644
--- a/processing-app/src/main/resources/glslfft/fft.frag
+++ b/processing-app/src/main/resources/glslfft/fft.frag
@@ -1,4 +1,4 @@
-#version 410
+#version 150
 
 // This shader is essentially a copy of this project https://github.com/rreusser/glsl-fft
 // with some minor adaptions to allow it to run in the processing environment.
diff --git a/processing-app/src/main/resources/glslfft/test.frag b/processing-app/src/main/resources/glslfft/test.frag
deleted file mode 100644
index 5d810f1..0000000
--- a/processing-app/src/main/resources/glslfft/test.frag
+++ /dev/null
@@ -1,16 +0,0 @@
-#version 410
-
-#ifdef GL_ES
-precision highp float;
-precision mediump int;
-#endif
-
-uniform vec2 resolution;
-uniform sampler2D src;
-
-out vec4 glFragColor;
-
-void main() {
-    vec2 pos = gl_FragCoord.xy / resolution;
-    glFragColor = texture(src, pos);
-}
diff --git a/processing-app/src/main/resources/prefixsum/prefixsum.frag b/processing-app/src/main/resources/prefixsum/prefixsum.frag
new file mode 100644
index 0000000..644875b
--- /dev/null
+++ b/processing-app/src/main/resources/prefixsum/prefixsum.frag
@@ -0,0 +1,22 @@
+#version 150
+
+#ifdef GL_ES
+precision highp float;
+precision highp int;
+#endif
+
+uniform sampler2D src;
+uniform vec2 resolution;
+uniform int stride;
+
+out float glFragColor;
+
+void main() {
+    vec2 p0 = (gl_FragCoord.xy - vec2(stride, 0)) * resolution;
+    vec2 p1 = gl_FragCoord.xy * resolution;
+
+    float v0 = texture(src, p0).x;
+    float v1 = texture(src, p1).x;
+
+    glFragColor = v0 + v1;
+}
diff --git a/processing-app/src/main/resources/prefixsum/render-greyscale.frag b/processing-app/src/main/resources/prefixsum/render-greyscale.frag
new file mode 100644
index 0000000..3edd31a
--- /dev/null
+++ b/processing-app/src/main/resources/prefixsum/render-greyscale.frag
@@ -0,0 +1,27 @@
+#version 150
+
+#ifdef GL_ES
+precision highp float;
+precision highp int;
+#endif
+
+#define MAX_SCALES 4
+
+uniform sampler2D grid;
+uniform vec2 resolution;
+
+out vec4 glFragColor;
+
+float map(
+float value,
+float start1, float stop1,
+float start2, float stop2) {
+    return start2 + (stop2 - start2) * ((value - start1) / (stop1 - start1));
+}
+
+
+void main() {
+    vec2 texCoord = gl_FragCoord.xy * resolution;
+    float value = map(texture(grid, texCoord).x, -1, 1, 0, 1);
+    glFragColor = vec4(vec3(value), 1.0);
+}
diff --git a/processing-app/src/main/resources/prefixsum/striped-prefixsum-blur.frag b/processing-app/src/main/resources/prefixsum/striped-prefixsum-blur.frag
new file mode 100644
index 0000000..82bebae
--- /dev/null
+++ b/processing-app/src/main/resources/prefixsum/striped-prefixsum-blur.frag
@@ -0,0 +1,48 @@
+#version 150
+
+#ifdef GL_ES
+precision highp float;
+precision highp int;
+#endif
+
+const float PI = 3.14159265358979323846;
+
+uniform sampler2D prefixSum;
+uniform vec2 resolution;
+uniform int width;
+uniform int radius;
+
+out float glFragColor;
+
+/*
+float circleAverage(constant float *grid, uint2 center, int r, int w, int h) {
+   int count = 0;
+   float total = 0;
+   for(int x = -r; x < r; x++) {
+      int yBound = floor(sqrt((float) r*r - x*x));
+      for (int y = -yBound; y < yBound; y++) {
+
+        int ix = x + center.x;
+        int iy = y + center.y;
+
+        if (ix >= 0 && ix < w && iy >= 0 && iy < h) {
+          count++;
+          total += grid[linear_index(ix, iy, w)];
+        }
+      }
+   }
+
+   return total / (float) count;
+}
+*/
+
+void main() {
+    float sum = 0.0;
+    for (int y = -radius; y <= radius; y++) {
+        float xBound = sqrt(radius*radius - y*y);
+        vec2 left = vec2(clamp(gl_FragCoord.x - xBound, 0, width - 1), gl_FragCoord.y + y);
+        vec2 right = vec2(clamp(gl_FragCoord.x + xBound, 0, width - 1), gl_FragCoord.y + y);
+        sum += texture(prefixSum, right * resolution).x - texture(prefixSum, left * resolution).x;
+    }
+    glFragColor = sum / (PI*radius*radius);
+}
diff --git a/processing-app/src/main/resources/prefixsum/turing-pattern-step.frag b/processing-app/src/main/resources/prefixsum/turing-pattern-step.frag
new file mode 100644
index 0000000..ed19a40
--- /dev/null
+++ b/processing-app/src/main/resources/prefixsum/turing-pattern-step.frag
@@ -0,0 +1,46 @@
+#version 150
+
+#ifdef GL_ES
+precision highp float;
+precision highp int;
+#endif
+
+#define MAX_SCALES 4
+
+uniform sampler2D grid;
+
+uniform sampler2D activator[MAX_SCALES];
+uniform sampler2D inhibitor[MAX_SCALES];
+uniform float bumpAmount[MAX_SCALES];
+
+uniform int scaleCount;
+
+uniform vec2 resolution;
+
+out float glFragColor;
+
+float map(
+    float value,
+    float start1, float stop1,
+    float start2, float stop2) {
+    return start2 + (stop2 - start2) * ((value - start1) / (stop1 - start1));
+}
+
+void main() {
+    vec2 texCoord = gl_FragCoord.xy * resolution;
+    float minVariation = 999.0;
+    float step = 0.0f;
+    for(int i = 0; i < scaleCount; i++) {
+        float activatorValue = texture(activator[i], texCoord).x;
+        float inhibitorValue = texture(inhibitor[i], texCoord).x;
+        float variation = abs(activatorValue - inhibitorValue);
+        if(variation < minVariation) {
+            minVariation = variation;
+            step = activatorValue > inhibitorValue ? bumpAmount[i] : -bumpAmount[i];
+        }
+    }
+
+    float value = texture(grid, texCoord).x + step;
+
+    glFragColor = map(value, -1 - step, 1 + step, -1, 1);
+}

From 891e05abf864d2c523c317512e11eba88aaa986b Mon Sep 17 00:00:00 2001
From: Eli Jordan <elias.jordan@tapad.com>
Date: Tue, 6 Apr 2021 01:12:42 +0200
Subject: [PATCH 2/2] create a test that can be used to compare perf of glsl vs
 opencl

---
 .../main/java/prefixsum/PrefixSumBlur.java    |  8 +++++
 .../java/prefixsum/PrefixSumBlurRender.java   | 35 +++++--------------
 2 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/processing-app/src/main/java/prefixsum/PrefixSumBlur.java b/processing-app/src/main/java/prefixsum/PrefixSumBlur.java
index a5751a6..2f4e7a6 100644
--- a/processing-app/src/main/java/prefixsum/PrefixSumBlur.java
+++ b/processing-app/src/main/java/prefixsum/PrefixSumBlur.java
@@ -33,15 +33,23 @@ public float[][] blur(float[][] data, int radius) {
       Buffer ping = sum.newBuffer(w, h);
       Buffer pong = sum.newBuffer(w, h);
 
+      long start, end;
+
+      start = System.currentTimeMillis();
       List<PrefixSum.Pass<Buffer>> passes = sum.prefixSumPasses(input, ping, pong, w, h);
       Buffer out = sum.runPasses(passes);
+      end = System.currentTimeMillis();
+      System.out.println("scan: " + (end - start) + " millis");
 
 //      System.out.println("PrefixSum");
 //      print(sum.read(out));
 
       Buffer blurOut = out == ping ? pong : ping;
 
+      start = System.currentTimeMillis();
       blur(out, blurOut, w, h, radius);
+      end = System.currentTimeMillis();
+      System.out.println("blur: " + (end - start) + " millis");
       return this.sum.read(blurOut);
    }
 
diff --git a/processing-app/src/main/java/prefixsum/PrefixSumBlurRender.java b/processing-app/src/main/java/prefixsum/PrefixSumBlurRender.java
index 459701f..8903e22 100644
--- a/processing-app/src/main/java/prefixsum/PrefixSumBlurRender.java
+++ b/processing-app/src/main/java/prefixsum/PrefixSumBlurRender.java
@@ -1,7 +1,6 @@
 package prefixsum;
 
 import com.thomasdiewald.pixelflow.java.DwPixelFlow;
-import com.thomasdiewald.pixelflow.java.dwgl.DwGLSLProgram;
 import com.thomasdiewald.pixelflow.java.imageprocessing.filter.Copy;
 import com.thomasdiewald.pixelflow.java.imageprocessing.filter.DwFilter;
 import processing.core.PApplet;
@@ -15,7 +14,7 @@ public class PrefixSumBlurRender extends PApplet {
 
    @Override
    public void settings() {
-      size(512, 512, P2D);
+      size(1024, 1024, P2D);
 
    }
 
@@ -25,33 +24,15 @@ public void setup() {
       this.sum = new PrefixSum(context);
       this.blur = new PrefixSumBlur(context);
 
-      DwGLSLProgram shader = this.context.createShader("prefixsum/turing-pattern-step.frag");
-
-      float[][] input1 = createInput(200);
-      Buffer buffer1 = sum.newBuffer(width, height, sum.prepare(input1));
-
-      float[][] input2 = createInput(80);
-      Buffer buffer2 = sum.newBuffer(width, height, sum.prepare(input2));
-
-
-      Buffer out = sum.newBuffer(width, height);
-
-      this.context.begin();
-      this.context.beginDraw(out.buf);
-
-      shader.begin();
-      shader.uniformTexture("test[0]", buffer1.buf);
-      shader.uniformTexture("test[1]", buffer2.buf);
-      shader.uniform2f("resolution", 1.0f / width, 1.0f / height);
-      shader.drawFullScreenQuad();
-
-      shader.end();
-
-      this.context.endDraw();
-      this.context.end();
+      long start = System.currentTimeMillis();
+      float[][] input = createInput(100);
+      float[][] blurResult = this.blur.blur(input, 300);
+      Buffer buffer = sum.newBuffer(width, height, sum.prepare(blurResult));
 
       Copy copy = DwFilter.get(context).copy;
-      copy.apply(out.buf, (PGraphicsOpenGL) g);
+      copy.apply(buffer.buf, (PGraphicsOpenGL) g);
+      long end = System.currentTimeMillis();
+      System.out.println("Time: " + (end - start) + " millis");
    }
 
    public static void main(String[] args) {