Skip to content

Commit

Permalink
Move processing into a serial queue.
Browse files Browse the repository at this point in the history
Deinterleave input audio.
Working version (still unhappy about input/output formats).
  • Loading branch information
nathanntg committed Nov 3, 2015
1 parent 5d1bd5f commit f1fb3e9
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 37 deletions.
55 changes: 40 additions & 15 deletions SyllableDetector/AudioInterface.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import Foundation
import AudioToolbox
import Accelerate

func renderOutput(inRefCon:UnsafeMutablePointer<Void>, actionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>, timeStamp: UnsafePointer<AudioTimeStamp>, busNumber: UInt32, frameCount: UInt32, data: UnsafeMutablePointer<AudioBufferList>) -> OSStatus {

Expand Down Expand Up @@ -65,12 +66,27 @@ func processInput(inRefCon:UnsafeMutablePointer<Void>, actionFlags: UnsafeMutabl
// data
let data = UnsafeMutablePointer<Float>(buffer.mData)

// receive audio
let frameLength = Int(frameCount) // number of floats
// number of channels
let maxi = Int(aii.inputFormat.mChannelsPerFrame)

// number of floats per channel
let frameLength = Int(frameCount)

// single channel? no interleaving
if maxi == 1 {
aii.delegate?.receiveAudioFrom(aii, fromChannel: 0, withData: data, ofLength: frameLength)
return 0
}

// multiple channels? de-interleave
var zero: Float = 0.0
for var i = 0; i < maxi; ++i {
// for each channel
aii.delegate?.receiveAudioFrom(aii, fromChannel: i, withData: data + (i * frameLength), ofLength: frameLength)
// use vDSP to deinterleave
vDSP_vsadd(data + i, vDSP_Stride(maxi), &zero, aii.buffer2, 1, vDSP_Length(frameLength))


// call delegate
aii.delegate?.receiveAudioFrom(aii, fromChannel: i, withData: aii.buffer2, ofLength: frameLength)
}

return 0
Expand Down Expand Up @@ -238,7 +254,7 @@ class AudioOutputInterface: AudioInterface
let deviceID: AudioDeviceID
let frameSize: Int

var outputFormat: AudioStreamBasicDescription = AudioStreamBasicDescription()
var outputFormat: AudioStreamBasicDescription = AudioStreamBasicDescription() // format of the actual audio hardware

var outputHighFor = [Int]()

Expand Down Expand Up @@ -278,12 +294,16 @@ class AudioOutputInterface: AudioInterface
try checkError(AudioUnitGetProperty(audioUnit, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Output, outputBus, &outputFormat, &size))

// print format information for debugging
assert(outputFormat.mFormatID == kAudioFormatLinearPCM)
assert(0 < (outputFormat.mFormatFlags & kAudioFormatFlagsNativeFloatPacked))
assert(0 == (outputFormat.mFormatFlags & kAudioFormatFlagIsNonInterleaved))
assert(1 == outputFormat.mFramesPerPacket)
assert(2 == outputFormat.mChannelsPerFrame)
assert(8 == outputFormat.mBytesPerFrame)
DLog("OUT \(outputFormat)")

// check for expected format
guard outputFormat.mFormatID == kAudioFormatLinearPCM && outputFormat.mFramesPerPacket == 1 && outputFormat.mFormatFlags == kAudioFormatFlagsNativeFloatPacked else {
throw AudioInterfaceError.UnsupportedFormat
}

// set the audio format
//size = UInt32(sizeof(AudioStreamBasicDescription))
//try checkError(AudioUnitSetProperty(audioUnit, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Input, outputBus, &outputFormat, size))

// initiate output array
outputHighFor = [Int](count: Int(outputFormat.mChannelsPerFrame), repeatedValue: 0)
Expand Down Expand Up @@ -349,6 +369,7 @@ class AudioInputInterface: AudioInterface

var inputFormat: AudioStreamBasicDescription = AudioStreamBasicDescription()
var buffer = UnsafeMutablePointer<Float>()
var buffer2 = UnsafeMutablePointer<Float>() // used for de-interleaving data
var bufferLen: Int = 0

init(deviceID: AudioDeviceID, frameSize: Int = 64) {
Expand Down Expand Up @@ -398,9 +419,12 @@ class AudioInputInterface: AudioInterface
try checkError(AudioUnitGetProperty(audioUnit, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Input, inputBus, &inputFormat, &size))

// print format information for debugging
assert(inputFormat.mFormatID == kAudioFormatLinearPCM)
assert(0 < (inputFormat.mFormatFlags & kAudioFormatFlagsNativeFloatPacked))
assert(1 == inputFormat.mFramesPerPacket)
DLog("IN \(inputFormat)")

// check for expected format
guard inputFormat.mFormatID == kAudioFormatLinearPCM && inputFormat.mFramesPerPacket == 1 && inputFormat.mFormatFlags == kAudioFormatFlagsNativeFloatPacked else {
throw AudioInterfaceError.UnsupportedFormat
}

// set the audio format
size = UInt32(sizeof(AudioStreamBasicDescription))
Expand All @@ -411,9 +435,10 @@ class AudioInputInterface: AudioInterface
size = UInt32(sizeof(UInt32))
try checkError(AudioUnitGetProperty(audioUnit, kAudioUnitProperty_MaximumFramesPerSlice, kAudioUnitScope_Global, 0, &maxFrameSize, &size))

// create buffer
// create buffers
bufferLen = Int(maxFrameSize * inputFormat.mBytesPerPacket)
buffer = UnsafeMutablePointer<Float>.alloc(bufferLen)
buffer2 = UnsafeMutablePointer<Float>.alloc(bufferLen)

// set frame size
var frameSize: UInt32 = UInt32(self.frameSize)
Expand Down
10 changes: 5 additions & 5 deletions SyllableDetector/Base.lproj/Main.storyboard
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ DQ
<color key="backgroundColor" name="controlBackgroundColor" catalog="System" colorSpace="catalog"/>
<color key="gridColor" name="gridColor" catalog="System" colorSpace="catalog"/>
<tableColumns>
<tableColumn identifier="ColumnInput" editable="NO" width="91" minWidth="40" maxWidth="1000" id="63y-z6-tY7">
<tableColumn identifier="ColumnInput" editable="NO" width="40" minWidth="40" maxWidth="1000" id="63y-z6-tY7">
<tableHeaderCell key="headerCell" lineBreakMode="truncatingTail" borderStyle="border" title="Input">
<font key="font" metaFont="smallSystem"/>
<color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
Expand All @@ -838,11 +838,11 @@ DQ
<color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
<color key="backgroundColor" name="headerColor" catalog="System" colorSpace="catalog"/>
</tableHeaderCell>
<levelIndicatorCell key="dataCell" state="on" alignment="left" doubleValue="3" maxValue="3" warningValue="3" criticalValue="3" id="Y9G-T6-pfa">
<levelIndicatorCell key="dataCell" state="on" alignment="left" maxValue="100" warningValue="80" criticalValue="90" levelIndicatorStyle="continuousCapacity" id="Y9G-T6-pfa">
<font key="font" metaFont="system"/>
</levelIndicatorCell>
</tableColumn>
<tableColumn identifier="ColumnNetwork" editable="NO" width="160" minWidth="40" maxWidth="1000" id="0ZF-2t-rye">
<tableColumn identifier="ColumnNetwork" editable="NO" width="90" minWidth="40" maxWidth="1000" id="0ZF-2t-rye">
<tableHeaderCell key="headerCell" lineBreakMode="truncatingTail" borderStyle="border" alignment="left" title="Neural Network">
<font key="font" metaFont="smallSystem"/>
<color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
Expand All @@ -855,7 +855,7 @@ DQ
</textFieldCell>
<tableColumnResizingMask key="resizingMask" resizeWithTable="YES"/>
</tableColumn>
<tableColumn identifier="ColumnOutput" editable="NO" width="92" minWidth="40" maxWidth="1000" id="KJj-w1-YTd">
<tableColumn identifier="ColumnOutput" editable="NO" width="45" minWidth="40" maxWidth="1000" id="KJj-w1-YTd">
<tableHeaderCell key="headerCell" lineBreakMode="truncatingTail" borderStyle="border" alignment="left" title="Output">
<font key="font" metaFont="smallSystem"/>
<color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
Expand All @@ -874,7 +874,7 @@ DQ
<color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
<color key="backgroundColor" white="0.0" alpha="0.0" colorSpace="calibratedWhite"/>
</tableHeaderCell>
<levelIndicatorCell key="dataCell" state="on" alignment="left" doubleValue="3" maxValue="3" warningValue="3" criticalValue="3" id="5OQ-nD-jPx">
<levelIndicatorCell key="dataCell" state="on" alignment="left" maxValue="100" warningValue="50" criticalValue="50" levelIndicatorStyle="continuousCapacity" id="5OQ-nD-jPx">
<font key="font" metaFont="system"/>
</levelIndicatorCell>
</tableColumn>
Expand Down
16 changes: 16 additions & 0 deletions SyllableDetector/CircularShortTimeFourierTransform.swift
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,22 @@ class CircularShortTimeFourierTransform
}
}

func appendInterleavedData(data: UnsafeMutablePointer<Float>, withSamples numSamples: Int, fromChannel channel: Int, ofTotalChannels totalChannels: Int) {
// get head of circular buffer
var space: Int32 = 0
let head = TPCircularBufferHead(&self.buffer, &space)
if Int(space) < numSamples {
fatalError("Insufficient space on buffer.")
}

// use vDSP to perform copy with stride
var zero: Float = 0.0
vDSP_vsadd(data + channel, vDSP_Stride(totalChannels), &zero, UnsafeMutablePointer<Float>(head), 1, vDSP_Length(numSamples))

// move head forward
TPCircularBufferProduce(&self.buffer, Int32(numSamples))
}

// TODO: write better functions that can help avoid double copying

func extractMagnitude() -> [Float]? {
Expand Down
89 changes: 72 additions & 17 deletions SyllableDetector/ViewControllerProcessor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class Processor: AudioInputInterfaceDelegate {
// high duration
let highDuration = 0.001 // 1ms

// dispatch queue
let queueProcessing: dispatch_queue_t

init(deviceInput: AudioInterface.AudioDevice, deviceOutput: AudioInterface.AudioDevice, entries: [ProcessorEntry]) throws {
// setup processor entries
self.entries = entries.filter {
Expand All @@ -56,22 +59,29 @@ class Processor: AudioInputInterfaceDelegate {
interfaceInput = AudioInputInterface(deviceID: deviceInput.deviceID)
interfaceOutput = AudioOutputInterface(deviceID: deviceOutput.deviceID)

// create queue
queueProcessing = dispatch_queue_create("ProcessorQueue", DISPATCH_QUEUE_SERIAL)

// set self as delegate
interfaceInput.delegate = self

try interfaceOutput.initializeAudio()
try interfaceInput.initializeAudio()

// check sampling rates
for d in self.detectors {
if (1 < abs(d.config.samplingRate - interfaceInput.inputFormat.mSampleRate)) {
DLog("Mismatched sampling rates.")
}
}
}

deinit {
DLog("stop")
interfaceInput.tearDownAudio()
interfaceOutput.tearDownAudio()
}

func receiveAudioFrom(interface: AudioInputInterface, fromChannel channel: Int, withData data: UnsafeMutablePointer<Float>, ofLength length: Int) {
DLog("\(channel) \(data[0])")

// valid channel
guard channel < channels.count else { return }

Expand All @@ -82,13 +92,29 @@ class Processor: AudioInputInterfaceDelegate {
// append audio samples
detectors[index].appendAudioData(data, withSamples: length)

if detectors[channel].seenSyllable() {
DLog("play") // for debugging

// play high
interfaceOutput.createHighOutput(entries[index].outputChannel, forDuration: highDuration)
// process
dispatch_async(queueProcessing) {
if self.detectors[index].seenSyllable() {
// record playing
DLog("\(channel) play")

// play high
self.interfaceOutput.createHighOutput(self.entries[index].outputChannel, forDuration: self.highDuration)
}
}
}

func getOutputForChannel(channel: Int) -> Float? {
// valid channel
guard channel < channels.count else { return nil }

// get index
let index = channels[channel]
guard index >= 0 else { return nil }

// TODO: replace with maximum value since last call
return detectors[index].lastOutput
}
}

class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableViewDataSource {
Expand All @@ -103,11 +129,26 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
var processorEntries = [ProcessorEntry]()
var processor: Processor?

// timer to redraw interface (saves time)
var timerRedraw: NSTimer?

var isRunning = false {
didSet {
if oldValue == isRunning { return }

// update interface
tableChannels.enabled = !isRunning
buttonLoad.enabled = !isRunning
buttonToggle.title = (isRunning ? "Stop" : "Start")

// start or stop timer
if isRunning {
timerRedraw = NSTimer.scheduledTimerWithTimeInterval(0.1, target: self, selector: "timerUpdateValues:", userInfo: nil, repeats: true)
}
else {
timerRedraw?.invalidate()
timerRedraw = nil
}
}
}

Expand All @@ -129,6 +170,14 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
tableChannels.reloadData()
}

override func viewWillDisappear() {
// clear processor
processor = nil
isRunning = false

super.viewWillDisappear()
}

func setupEntries(input deviceInput: AudioInterface.AudioDevice, output deviceOutput: AudioInterface.AudioDevice) {
// store input and output
self.deviceInput = deviceInput
Expand Down Expand Up @@ -186,14 +235,6 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
}
}

override func viewWillDisappear() {
// clear processor
processor = nil
isRunning = false

super.viewWillDisappear()
}

func numberOfRowsInTableView(tableView: NSTableView) -> Int {
let inputChannels: Int, outputChannels: Int

Expand Down Expand Up @@ -224,7 +265,12 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie

switch identifier {
case "ColumnInput", "ColumnOutput": return "Channel \(row + 1)"
case "ColumnInLevel", "ColumnOutLevel": return NSNumber(float: 0.0)
case "ColumnInLevel": return NSNumber(float: 0.0)
case "ColumnOutLevel":
if let p = processor {
return NSNumber(float: 100.0 * (p.getOutputForChannel(row) ?? 0.0))
}
return NSNumber(float: 0.00)
case "ColumnNetwork": return nil == processorEntries[row].config ? "Not Selected" : processorEntries[row].network
default: return nil
}
Expand Down Expand Up @@ -302,5 +348,14 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
// show
panel.beginSheetModalForWindow(self.view.window!, completionHandler: cb)
}

func timerUpdateValues(timer: NSTimer!) {
// create column indices
let indexes = NSMutableIndexSet(index: 1)
indexes.addIndex(4)

// reload data
tableChannels.reloadDataForRowIndexes(NSIndexSet(indexesInRange: NSRange(location: 0, length: processorEntries.count)), columnIndexes: indexes)
}
}

0 comments on commit f1fb3e9

Please sign in to comment.