Move processing into a serial queue.

Deinterleave input audio. Working version (still unhappy about input/output formats).
gardner-lab · Nov 3, 2015 · f1fb3e9 · f1fb3e9
1 parent 5d1bd5f
commit f1fb3e9
Show file tree

Hide file tree

Showing 4 changed files with 133 additions and 37 deletions.
diff --git a/SyllableDetector/AudioInterface.swift b/SyllableDetector/AudioInterface.swift
@@ -8,6 +8,7 @@
 
 import Foundation
 import AudioToolbox
+import Accelerate
 
 func renderOutput(inRefCon:UnsafeMutablePointer<Void>, actionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>, timeStamp: UnsafePointer<AudioTimeStamp>, busNumber: UInt32, frameCount: UInt32, data: UnsafeMutablePointer<AudioBufferList>) -> OSStatus {
 
@@ -65,12 +66,27 @@ func processInput(inRefCon:UnsafeMutablePointer<Void>, actionFlags: UnsafeMutabl
     // data
     let data = UnsafeMutablePointer<Float>(buffer.mData)
 
-    // receive audio
-    let frameLength = Int(frameCount) // number of floats
+    // number of channels
     let maxi = Int(aii.inputFormat.mChannelsPerFrame)
+
+    // number of floats per channel
+    let frameLength = Int(frameCount)
+
+    // single channel? no interleaving
+    if maxi == 1 {
+        aii.delegate?.receiveAudioFrom(aii, fromChannel: 0, withData: data, ofLength: frameLength)
+        return 0
+    }
+
+    // multiple channels? de-interleave
+    var zero: Float = 0.0
     for var i = 0; i < maxi; ++i {
-        // for each channel
-        aii.delegate?.receiveAudioFrom(aii, fromChannel: i, withData: data + (i * frameLength), ofLength: frameLength)
+        // use vDSP to deinterleave
+        vDSP_vsadd(data + i, vDSP_Stride(maxi), &zero, aii.buffer2, 1, vDSP_Length(frameLength))
+
+
+        // call delegate
+        aii.delegate?.receiveAudioFrom(aii, fromChannel: i, withData: aii.buffer2, ofLength: frameLength)
     }
 
     return 0
@@ -238,7 +254,7 @@ class AudioOutputInterface: AudioInterface
     let deviceID: AudioDeviceID
     let frameSize: Int
 
-    var outputFormat: AudioStreamBasicDescription = AudioStreamBasicDescription()
+    var outputFormat: AudioStreamBasicDescription = AudioStreamBasicDescription() // format of the actual audio hardware
 
     var outputHighFor = [Int]()
 
@@ -278,12 +294,16 @@ class AudioOutputInterface: AudioInterface
         try checkError(AudioUnitGetProperty(audioUnit, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Output, outputBus, &outputFormat, &size))
 
         // print format information for debugging
-        assert(outputFormat.mFormatID == kAudioFormatLinearPCM)
-        assert(0 < (outputFormat.mFormatFlags & kAudioFormatFlagsNativeFloatPacked))
-        assert(0 == (outputFormat.mFormatFlags & kAudioFormatFlagIsNonInterleaved))
-        assert(1 == outputFormat.mFramesPerPacket)
-        assert(2 == outputFormat.mChannelsPerFrame)
-        assert(8 == outputFormat.mBytesPerFrame)
+        DLog("OUT \(outputFormat)")
+
+        // check for expected format
+        guard outputFormat.mFormatID == kAudioFormatLinearPCM && outputFormat.mFramesPerPacket == 1 && outputFormat.mFormatFlags == kAudioFormatFlagsNativeFloatPacked else {
+            throw AudioInterfaceError.UnsupportedFormat
+        }
+
+        // set the audio format
+        //size = UInt32(sizeof(AudioStreamBasicDescription))
+        //try checkError(AudioUnitSetProperty(audioUnit, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Input, outputBus, &outputFormat, size))
 
         // initiate output array
         outputHighFor = [Int](count: Int(outputFormat.mChannelsPerFrame), repeatedValue: 0)
@@ -349,6 +369,7 @@ class AudioInputInterface: AudioInterface
 
     var inputFormat: AudioStreamBasicDescription = AudioStreamBasicDescription()
     var buffer = UnsafeMutablePointer<Float>()
+    var buffer2 = UnsafeMutablePointer<Float>() // used for de-interleaving data
     var bufferLen: Int = 0
 
     init(deviceID: AudioDeviceID, frameSize: Int = 64) {
@@ -398,9 +419,12 @@ class AudioInputInterface: AudioInterface
         try checkError(AudioUnitGetProperty(audioUnit, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Input, inputBus, &inputFormat, &size))
 
         // print format information for debugging
-        assert(inputFormat.mFormatID == kAudioFormatLinearPCM)
-        assert(0 < (inputFormat.mFormatFlags & kAudioFormatFlagsNativeFloatPacked))
-        assert(1 == inputFormat.mFramesPerPacket)
+        DLog("IN \(inputFormat)")
+
+        // check for expected format
+        guard inputFormat.mFormatID == kAudioFormatLinearPCM && inputFormat.mFramesPerPacket == 1 && inputFormat.mFormatFlags == kAudioFormatFlagsNativeFloatPacked else {
+            throw AudioInterfaceError.UnsupportedFormat
+        }
 
         // set the audio format
         size = UInt32(sizeof(AudioStreamBasicDescription))
@@ -411,9 +435,10 @@ class AudioInputInterface: AudioInterface
         size = UInt32(sizeof(UInt32))
         try checkError(AudioUnitGetProperty(audioUnit, kAudioUnitProperty_MaximumFramesPerSlice, kAudioUnitScope_Global, 0, &maxFrameSize, &size))
 
-        // create buffer
+        // create buffers
         bufferLen = Int(maxFrameSize * inputFormat.mBytesPerPacket)
         buffer = UnsafeMutablePointer<Float>.alloc(bufferLen)
+        buffer2 = UnsafeMutablePointer<Float>.alloc(bufferLen)
 
         // set frame size
         var frameSize: UInt32 = UInt32(self.frameSize)

diff --git a/SyllableDetector/Base.lproj/Main.storyboard b/SyllableDetector/Base.lproj/Main.storyboard
@@ -819,7 +819,7 @@ DQ
                                             <color key="backgroundColor" name="controlBackgroundColor" catalog="System" colorSpace="catalog"/>
                                             <color key="gridColor" name="gridColor" catalog="System" colorSpace="catalog"/>
                                             <tableColumns>
-                                                <tableColumn identifier="ColumnInput" editable="NO" width="91" minWidth="40" maxWidth="1000" id="63y-z6-tY7">
+                                                <tableColumn identifier="ColumnInput" editable="NO" width="40" minWidth="40" maxWidth="1000" id="63y-z6-tY7">
                                                     <tableHeaderCell key="headerCell" lineBreakMode="truncatingTail" borderStyle="border" title="Input">
                                                         <font key="font" metaFont="smallSystem"/>
                                                         <color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
@@ -838,11 +838,11 @@ DQ
                                                         <color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
                                                         <color key="backgroundColor" name="headerColor" catalog="System" colorSpace="catalog"/>
                                                     </tableHeaderCell>
-                                                    <levelIndicatorCell key="dataCell" state="on" alignment="left" doubleValue="3" maxValue="3" warningValue="3" criticalValue="3" id="Y9G-T6-pfa">
+                                                    <levelIndicatorCell key="dataCell" state="on" alignment="left" maxValue="100" warningValue="80" criticalValue="90" levelIndicatorStyle="continuousCapacity" id="Y9G-T6-pfa">
                                                         <font key="font" metaFont="system"/>
                                                     </levelIndicatorCell>
                                                 </tableColumn>
-                                                <tableColumn identifier="ColumnNetwork" editable="NO" width="160" minWidth="40" maxWidth="1000" id="0ZF-2t-rye">
+                                                <tableColumn identifier="ColumnNetwork" editable="NO" width="90" minWidth="40" maxWidth="1000" id="0ZF-2t-rye">
                                                     <tableHeaderCell key="headerCell" lineBreakMode="truncatingTail" borderStyle="border" alignment="left" title="Neural Network">
                                                         <font key="font" metaFont="smallSystem"/>
                                                         <color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
@@ -855,7 +855,7 @@ DQ
                                                     </textFieldCell>
                                                     <tableColumnResizingMask key="resizingMask" resizeWithTable="YES"/>
                                                 </tableColumn>
-                                                <tableColumn identifier="ColumnOutput" editable="NO" width="92" minWidth="40" maxWidth="1000" id="KJj-w1-YTd">
+                                                <tableColumn identifier="ColumnOutput" editable="NO" width="45" minWidth="40" maxWidth="1000" id="KJj-w1-YTd">
                                                     <tableHeaderCell key="headerCell" lineBreakMode="truncatingTail" borderStyle="border" alignment="left" title="Output">
                                                         <font key="font" metaFont="smallSystem"/>
                                                         <color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
@@ -874,7 +874,7 @@ DQ
                                                         <color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
                                                         <color key="backgroundColor" white="0.0" alpha="0.0" colorSpace="calibratedWhite"/>
                                                     </tableHeaderCell>
-                                                    <levelIndicatorCell key="dataCell" state="on" alignment="left" doubleValue="3" maxValue="3" warningValue="3" criticalValue="3" id="5OQ-nD-jPx">
+                                                    <levelIndicatorCell key="dataCell" state="on" alignment="left" maxValue="100" warningValue="50" criticalValue="50" levelIndicatorStyle="continuousCapacity" id="5OQ-nD-jPx">
                                                         <font key="font" metaFont="system"/>
                                                     </levelIndicatorCell>
                                                 </tableColumn>

diff --git a/SyllableDetector/CircularShortTimeFourierTransform.swift b/SyllableDetector/CircularShortTimeFourierTransform.swift
@@ -161,6 +161,22 @@ class CircularShortTimeFourierTransform
         }
     }
 
+    func appendInterleavedData(data: UnsafeMutablePointer<Float>, withSamples numSamples: Int, fromChannel channel: Int, ofTotalChannels totalChannels: Int) {
+        // get head of circular buffer
+        var space: Int32 = 0
+        let head = TPCircularBufferHead(&self.buffer, &space)
+        if Int(space) < numSamples {
+            fatalError("Insufficient space on buffer.")
+        }
+
+        // use vDSP to perform copy with stride
+        var zero: Float = 0.0
+        vDSP_vsadd(data + channel, vDSP_Stride(totalChannels), &zero, UnsafeMutablePointer<Float>(head), 1, vDSP_Length(numSamples))
+
+        // move head forward
+        TPCircularBufferProduce(&self.buffer, Int32(numSamples))
+    }
+
     // TODO: write better functions that can help avoid double copying
 
     func extractMagnitude() -> [Float]? {

diff --git a/SyllableDetector/ViewControllerProcessor.swift b/SyllableDetector/ViewControllerProcessor.swift
@@ -34,6 +34,9 @@ class Processor: AudioInputInterfaceDelegate {
     // high duration
     let highDuration = 0.001 // 1ms
 
+    // dispatch queue
+    let queueProcessing: dispatch_queue_t
+
     init(deviceInput: AudioInterface.AudioDevice, deviceOutput: AudioInterface.AudioDevice, entries: [ProcessorEntry]) throws {
         // setup processor entries
         self.entries = entries.filter {
@@ -56,22 +59,29 @@ class Processor: AudioInputInterfaceDelegate {
         interfaceInput = AudioInputInterface(deviceID: deviceInput.deviceID)
         interfaceOutput = AudioOutputInterface(deviceID: deviceOutput.deviceID)
 
+        // create queue
+        queueProcessing = dispatch_queue_create("ProcessorQueue", DISPATCH_QUEUE_SERIAL)
+
         // set self as delegate
         interfaceInput.delegate = self
 
         try interfaceOutput.initializeAudio()
         try interfaceInput.initializeAudio()
+
+        // check sampling rates
+        for d in self.detectors {
+            if (1 < abs(d.config.samplingRate - interfaceInput.inputFormat.mSampleRate)) {
+                DLog("Mismatched sampling rates.")
+            }
+        }
     }
 
     deinit {
-        DLog("stop")
         interfaceInput.tearDownAudio()
         interfaceOutput.tearDownAudio()
     }
 
     func receiveAudioFrom(interface: AudioInputInterface, fromChannel channel: Int, withData data: UnsafeMutablePointer<Float>, ofLength length: Int) {
-        DLog("\(channel) \(data[0])")
-
         // valid channel
         guard channel < channels.count else { return }
 
@@ -82,13 +92,29 @@ class Processor: AudioInputInterfaceDelegate {
         // append audio samples
         detectors[index].appendAudioData(data, withSamples: length)
 
-        if detectors[channel].seenSyllable() {
-            DLog("play") // for debugging
-
-            // play high
-            interfaceOutput.createHighOutput(entries[index].outputChannel, forDuration: highDuration)
+        // process
+        dispatch_async(queueProcessing) {
+            if self.detectors[index].seenSyllable() {
+                // record playing
+                DLog("\(channel) play")
+
+                // play high
+                self.interfaceOutput.createHighOutput(self.entries[index].outputChannel, forDuration: self.highDuration)
+            }
         }
     }
+
+    func getOutputForChannel(channel: Int) -> Float? {
+        // valid channel
+        guard channel < channels.count else { return nil }
+
+        // get index
+        let index = channels[channel]
+        guard index >= 0 else { return nil }
+
+        // TODO: replace with maximum value since last call
+        return detectors[index].lastOutput
+    }
 }
 
 class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableViewDataSource {
@@ -103,11 +129,26 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
     var processorEntries = [ProcessorEntry]()
     var processor: Processor?
 
+    // timer to redraw interface (saves time)
+    var timerRedraw: NSTimer?
+
     var isRunning = false {
         didSet {
+            if oldValue == isRunning { return }
+
+            // update interface
             tableChannels.enabled = !isRunning
             buttonLoad.enabled = !isRunning
             buttonToggle.title = (isRunning ? "Stop" : "Start")
+
+            // start or stop timer
+            if isRunning {
+                timerRedraw = NSTimer.scheduledTimerWithTimeInterval(0.1, target: self, selector: "timerUpdateValues:", userInfo: nil, repeats: true)
+            }
+            else {
+                timerRedraw?.invalidate()
+                timerRedraw = nil
+            }
         }
     }
 
@@ -129,6 +170,14 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
         tableChannels.reloadData()
     }
 
+    override func viewWillDisappear() {
+        // clear processor
+        processor = nil
+        isRunning = false
+
+        super.viewWillDisappear()
+    }
+
     func setupEntries(input deviceInput: AudioInterface.AudioDevice, output deviceOutput: AudioInterface.AudioDevice) {
         // store input and output
         self.deviceInput = deviceInput
@@ -186,14 +235,6 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
         }
     }
 
-    override func viewWillDisappear() {
-        // clear processor
-        processor = nil
-        isRunning = false
-
-        super.viewWillDisappear()
-    }
-
     func numberOfRowsInTableView(tableView: NSTableView) -> Int {
         let inputChannels: Int, outputChannels: Int
 
@@ -224,7 +265,12 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
 
         switch identifier {
         case "ColumnInput", "ColumnOutput": return "Channel \(row + 1)"
-        case "ColumnInLevel", "ColumnOutLevel": return NSNumber(float: 0.0)
+        case "ColumnInLevel": return NSNumber(float: 0.0)
+        case "ColumnOutLevel":
+            if let p = processor {
+                return NSNumber(float: 100.0 * (p.getOutputForChannel(row) ?? 0.0))
+            }
+            return NSNumber(float: 0.00)
         case "ColumnNetwork": return nil == processorEntries[row].config ? "Not Selected" : processorEntries[row].network
         default: return nil
         }
@@ -302,5 +348,14 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
         // show
         panel.beginSheetModalForWindow(self.view.window!, completionHandler: cb)
     }
+
+    func timerUpdateValues(timer: NSTimer!) {
+        // create column indices
+        let indexes = NSMutableIndexSet(index: 1)
+        indexes.addIndex(4)
+
+        // reload data
+        tableChannels.reloadDataForRowIndexes(NSIndexSet(indexesInRange: NSRange(location: 0, length: processorEntries.count)), columnIndexes: indexes)
+    }
 }