From 94db3e70c8c5018c4ad31e0c8345b5eef0b95b5a Mon Sep 17 00:00:00 2001
From: Kevin DeVries <kevin@leaflabs.com>
Date: Fri, 6 Dec 2024 08:42:54 -0500
Subject: [PATCH] Resolved: Incorrect implementation of copying chunkSize != 1
 data sets in DataBuffer. Some optimizations were made when copying contiguous
 data.

---
 Source/Processors/DataThreads/DataBuffer.cpp | 123 ++++++++++++++-----
 Source/Processors/DataThreads/DataBuffer.h   |  29 ++++-
 2 files changed, 119 insertions(+), 33 deletions(-)

diff --git a/Source/Processors/DataThreads/DataBuffer.cpp b/Source/Processors/DataThreads/DataBuffer.cpp
index 4026db0e01..1426545fb1 100755
--- a/Source/Processors/DataThreads/DataBuffer.cpp
+++ b/Source/Processors/DataThreads/DataBuffer.cpp
@@ -68,45 +68,104 @@ int DataBuffer::addToBuffer (float* data,
                              int numItems,
                              int chunkSize)
 {
-    int startIndex1, blockSize1, startIndex2, blockSize2;
-
-    abstractFifo.prepareToWrite (numItems, startIndex1, blockSize1, startIndex2, blockSize2);
-
-    int bs[3] = { blockSize1, blockSize2, 0 };
-    int si[2] = { startIndex1, startIndex2 };
-    int cSize = 0;
-    int idx = 0;
-    int blkIdx;
-        
-    for (int i = 0; bs[i] != 0; ++i)
-    {                                // for each of the dest blocks we can write to...
-        blkIdx = 0;
-        for (int j = 0; j < bs[i]; j+= chunkSize) 
-        {                     // for each chunk...
-            cSize = chunkSize <= bs[i] - j ? chunkSize : bs[i] - j;     // figure our how much you can write
-            for (int chan = 0; chan < numChans; ++chan)         // write that much, per channel
-            {
-                buffer.copyFrom (chan,                           // (int destChannel)
-                                 si[i] + j,                      // (int destStartSample)
-                                 data + (idx * numChans) + chan, // (const float* source)
-                                 cSize);                         // (int num samples)
-            }
+    int bs[] = { -1, -1, 0 };    // zero-termintated as check in outer loop
+    int si[] = { -1, -1};
+    int cSize = chunkSize;
+    int offset = 0;
+    
+    abstractFifo.prepareToWrite (numItems, si[0], bs[0], si[1], bs[1]);
 
-            for (int k = 0; k < cSize; ++k)
+    // The contiguous case is separated from the interleaved version to reduce the
+    // overhead of the for loops and small copies.
+    if (numItems == chunkSize)
+    {
+        for (int chan = 0; chan < numChans; ++chan)
+        {
+            // copy first half
+            buffer.copyFrom (chan,                       // destination channel
+                             si[0],                // offset into buffer
+                             data + chunkSize * chan,    // source data location
+                             bs[0]);                // number of samples
+
+            // copy remaining (if any)
+            if (bs[1] != 0)
             {
-                sampleNumberBuffer[si[i] + blkIdx + k] = sampleNumbers[idx + k];
-                timestampBuffer[si[i] + blkIdx + k] = timestamps[idx + k];
-                eventCodeBuffer[si[i] + blkIdx + k] = eventCodes[idx + k];
+                buffer.copyFrom (chan,
+                                 si[1],
+                                 data + chunkSize * chan + bs[0],
+                                 bs[1]);
             }
-            idx     += cSize;
-            blkIdx  += cSize;
         }
     }
+    else
+    {
+      for (int i = 0; bs[i] != 0; ++i)
+      {                                // for each of the dest blocks we can write to...
+          for (int j = 0; j < bs[i]; )
+          {                     // for each chunk...
+              // If there wasn't enough room at the end of the circular buffer
+              // to copy the full chunk, the rest of that chunk needs to be
+              // copied to the start of the circular buffer.
+              // The bounds check is not necessary since the data is being
+              // copied to the beginning of the circular buffer
+              // (si[1] should always be 0).
+              if (cSize != chunkSize)
+              {
+                  offset = cSize;              // previous amount copied
+                  cSize = chunkSize - cSize;   // remaining to copy
+              }
+              else
+              {
+                  offset = 0;
+                  cSize = chunkSize <= bs[i] - j ? chunkSize : bs[i] - j;     // prevent out-of-bounds on buffer
+              }
+
+              for (int chan = 0; chan < numChans; ++chan)
+              {
+                  buffer.copyFrom (chan,
+                                   si[i] + j,
+                                   data + chunkSize * chan + offset,
+                                   cSize);
+              }
+
+              j += cSize;   // advance the loop counter based on copied amount
+
+              //  advance source data pointer, continue with full sized chunks.
+              if ((cSize == chunkSize) || (i != 0))
+              {
+                data += chunkSize * numChans;
+                cSize = chunkSize;
+              }
+              // else keep data and cSize to get 'remainder' of current group to
+              // paste into the start of the circular buffer on next iteration.
+              // At this point, internal loop should complete and the external
+              // loop should go to next start/size set.
+          }
+      }
+    }
+
+    // Copy the other items; all of this information is contiguous,
+    // so up to two copies are needed for wrap-around on the circular buffers.
+
+    memcpy(sampleNumberBuffer + si[0], sampleNumbers, bs[0] * sizeof(sampleNumbers[0]));
+    if (bs[1] != 0) {
+      memcpy(sampleNumberBuffer, sampleNumbers + bs[0], bs[1] * sizeof(sampleNumbers[0]));
+    }
+
+    memcpy(timestampBuffer + si[0], timestamps, bs[0] * sizeof(timestamps[0]));
+    if (bs[1] != 0) {
+      memcpy(timestampBuffer, timestamps + bs[0], bs[1] * sizeof(timestamps[0]));
+    }
+
+    memcpy(eventCodeBuffer + si[0], eventCodes, bs[0] * sizeof(eventCodes[0]));
+    if (bs[1] != 0) {
+      memcpy(eventCodeBuffer, eventCodes + bs[0], bs[1] * sizeof(eventCodes[0]));
+    }
 
-    // finish write
-    abstractFifo.finishedWrite (idx);
+    // maintain state of the circular buffer by updating the last written position.
+    abstractFifo.finishedWrite (bs[0] + bs[1]);
 
-    return idx;
+    return bs[0] + bs[1];
 }
 
 
diff --git a/Source/Processors/DataThreads/DataBuffer.h b/Source/Processors/DataThreads/DataBuffer.h
index d9cf6f8c67..3e30209a61 100755
--- a/Source/Processors/DataThreads/DataBuffer.h
+++ b/Source/Processors/DataThreads/DataBuffer.h
@@ -46,6 +46,32 @@ class PLUGIN_API DataBuffer
     void clear();
 
     /** Add an array of floats to the buffer.
+        The data is laid out as a set of channels with a series of samples per channel.
+        The number of samples (chunkSize) in each sequence can be from 1..numItems.
+
+        Examples:
+        In the following examples, the numbers shown in the data indicate the channel #.
+
+        numItems = 4
+        chunkSize = 1
+        # channels = 3
+
+        Data:
+        1 2 3 1 2 3 1 2 3 1 2 3
+
+        numItems = 4
+        chunkSize = 2
+        # channels = 3
+
+        Data:
+        1 1 2 2 3 3 1 1 2 2 3 3
+
+        numItmes = 4
+        chunkSize = 4
+        # channels = 3
+
+        Data:
+        1 1 1 1 2 2 2 2 3 3 3 3
 
         @param data The data.
         @param sampleNumbers  Array of sample numbers (integers). Same length as numItems.
@@ -53,7 +79,8 @@ class PLUGIN_API DataBuffer
         @param eventCodes Array of event codes. Same length as numItems.
         @param numItems Total number of samples per channel.
         @param chunkSize Number of consecutive samples per channel per chunk.
-        1 by default. Typically 1 or numItems.
+        1 by default. Typically, 1 or numItems.
+        Must be a multiple of numItems.
 
         @return The number of items actually written. May be less than numItems if
         the buffer doesn't have space.