Skip to content

Commit

Permalink
Merge pull request #12 from nathanntg/resample
Browse files Browse the repository at this point in the history
Resample audio rather than adjusting parameters
  • Loading branch information
nathanntg committed Jan 5, 2016
2 parents a450b03 + 086b1ab commit 5507db0
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 28 deletions.
4 changes: 4 additions & 0 deletions SyllableDetector.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
D8625C9C1BE149FE000922D8 /* CircularShortTimeFourierTransform.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8625C931BE149FE000922D8 /* CircularShortTimeFourierTransform.swift */; };
D8625CA41BE14A53000922D8 /* TPCircularBuffer.c in Sources */ = {isa = PBXBuildFile; fileRef = D8625CA21BE14A53000922D8 /* TPCircularBuffer.c */; };
D89269891BF3AEDE009482C2 /* SummaryStat.swift in Sources */ = {isa = PBXBuildFile; fileRef = D89269881BF3AEDE009482C2 /* SummaryStat.swift */; };
D89803B51C3C2ACF000CFC41 /* Resampler.swift in Sources */ = {isa = PBXBuildFile; fileRef = D89803B41C3C2ACF000CFC41 /* Resampler.swift */; };
D8B59D161BE6B77F0099CB4B /* StreamReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8B59D151BE6B77F0099CB4B /* StreamReader.swift */; };
D8D1BD481BE6D61B00059974 /* ViewControllerMenu.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8D1BD471BE6D61B00059974 /* ViewControllerMenu.swift */; };
D8D1BD4D1BE6E53C00059974 /* WindowControllerProcessor.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8D1BD4C1BE6E53C00059974 /* WindowControllerProcessor.swift */; };
Expand All @@ -43,6 +44,7 @@
D8625CA31BE14A53000922D8 /* TPCircularBuffer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TPCircularBuffer.h; sourceTree = "<group>"; };
D8625CA51BE14B8E000922D8 /* SyllableDetector-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "SyllableDetector-Bridging-Header.h"; sourceTree = "<group>"; };
D89269881BF3AEDE009482C2 /* SummaryStat.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SummaryStat.swift; sourceTree = "<group>"; };
D89803B41C3C2ACF000CFC41 /* Resampler.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Resampler.swift; sourceTree = "<group>"; };
D8B59D151BE6B77F0099CB4B /* StreamReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = StreamReader.swift; sourceTree = "<group>"; };
D8D1BD471BE6D61B00059974 /* ViewControllerMenu.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewControllerMenu.swift; sourceTree = "<group>"; };
D8D1BD4C1BE6E53C00059974 /* WindowControllerProcessor.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = WindowControllerProcessor.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -84,6 +86,7 @@
D8625C931BE149FE000922D8 /* CircularShortTimeFourierTransform.swift */,
D8625C911BE149FE000922D8 /* Common.swift */,
D8625C8E1BE149FE000922D8 /* NeuralNet.swift */,
D89803B41C3C2ACF000CFC41 /* Resampler.swift */,
D8B59D151BE6B77F0099CB4B /* StreamReader.swift */,
D89269881BF3AEDE009482C2 /* SummaryStat.swift */,
D8625C8B1BE149FE000922D8 /* SyllableDetector.swift */,
Expand Down Expand Up @@ -191,6 +194,7 @@
D8625C9A1BE149FE000922D8 /* Common.swift in Sources */,
D82E7F7B1C10B629004759F7 /* ViewControllerSimulator.swift in Sources */,
D8625C7F1BE14922000922D8 /* ViewControllerProcessor.swift in Sources */,
D89803B51C3C2ACF000CFC41 /* Resampler.swift in Sources */,
D8625C951BE149FE000922D8 /* AudioInterface.swift in Sources */,
D89269891BF3AEDE009482C2 /* SummaryStat.swift in Sources */,
D8625C7D1BE14922000922D8 /* AppDelegate.swift in Sources */,
Expand Down
81 changes: 81 additions & 0 deletions SyllableDetector/Resampler.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
//
// Resampler.swift
// SyllableDetector
//
// Created by Nathan Perkins on 1/5/16.
// Copyright © 2016 Gardner Lab. All rights reserved.
//

import Foundation
import Accelerate

protocol Resampler {
func resampleVector(data: UnsafePointer<Float>, ofLength numSamples: Int) -> [Float]
// func resampleVector(data: UnsafePointer<Float>, ofLength numSamples: Int, toOutput: UnsafeMutablePointer<Float>) -> Int
}

// potentially use: http://www.mega-nerd.com/SRC/api_misc.html#Converters

/// Terrible quality, very fast.
class ResamplerLinear: Resampler {
let samplingRateIn: Double
let samplingRateOut: Double

private let step: Float
private var last: Float = 0.0 // used for interpolating across samples
private var offset: Float = 0.0

init(fromRate samplingRateIn: Double, toRate samplingRateOut: Double) {
self.samplingRateIn = samplingRateIn
self.samplingRateOut = samplingRateOut

self.step = Float(samplingRateIn / samplingRateOut)
}

func resampleVector(data: UnsafePointer<Float>, ofLength numSamplesIn: Int) -> [Float] {
// need to interpolate across last set of samples
let interpolateAcross = (offset < 0)

// expected number of samples from current
let numSamplesOut = Int((Float(numSamplesIn) - offset) / step)

// return list
var ret = [Float](count: numSamplesOut, repeatedValue: 0.0)

// indices
let indices = UnsafeMutablePointer<Float>.alloc(numSamplesOut)
var t_offset = offset, t_step = step
defer {
indices.destroy()
indices.dealloc(numSamplesOut)
}
vDSP_vramp(&t_offset, &t_step, indices, 1, vDSP_Length(numSamplesOut))

if interpolateAcross {
indices[0] = 0.0
}

// interpolate
vDSP_vlint(data, indices, 1, &ret, 1, vDSP_Length(numSamplesOut), vDSP_Length(numSamplesIn))

if interpolateAcross {
ret[0] = (last * (0 - offset)) + (data[0] * (1 + offset))
}

offset = indices[numSamplesOut - 1] + step - Float(numSamplesIn - 1)
last = data[numSamplesIn - 1]
//print("\(indices[numSamplesOut - 1]) \(numSamplesIn) \(offset)")

return ret
}

func resampleArray(var arr: [Float]) -> [Float] {
// used for testing
return self.resampleVector(&arr, ofLength: arr.count)
}

// func resampleVector(data: UnsafePointer<Float>, ofLength numSamples: Int, toOutput: UnsafeMutablePointer<Float>) -> Int {
//
// }
}

26 changes: 2 additions & 24 deletions SyllableDetector/SyllableDetectorConfig.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ struct SyllableDetectorConfig
}
}

var samplingRate: Double // eqv: samplerate
let samplingRate: Double // eqv: samplerate
let fourierLength: Int // eqv: FFT_SIZE
let windowLength: Int
var windowOverlap: Int // eqv: NOVERLAP = FFT_SIZE - (floor(samplerate * FFT_TIME_SHIFT))
let windowOverlap: Int // eqv: NOVERLAP = FFT_SIZE - (floor(samplerate * FFT_TIME_SHIFT))

let freqRange: (Double, Double) // eqv: freq_range
let timeRange: Int // eqv: time_window_steps = double(floor(time_window / timestep))
Expand All @@ -42,28 +42,6 @@ struct SyllableDetectorConfig
let threshold: Double // eqv: trigger threshold

let net: NeuralNet

mutating func modifySamplingRate(newSamplingRate: Double) {
// store old things
let oldSamplingRate = samplingRate
let oldWindowOverlap = windowOverlap

if oldSamplingRate == newSamplingRate { return }

// window offset (difference in time between two consecutive windows)
// calculated using old values, but should be constant even after the new values
let windowOffset = Double(windowLength - oldWindowOverlap) / oldSamplingRate

let newWindowOverlap = windowLength - Int(round(windowOffset * newSamplingRate))

// change the to new things
samplingRate = newSamplingRate
windowOverlap = newWindowOverlap

DLog("Window offset: \(windowOffset)")
DLog("Sanity check: \(Double(windowLength - oldWindowOverlap) / oldSamplingRate)")
DLog("Window overlap: OLD \(oldWindowOverlap) NEW \(newWindowOverlap)")
}
}

// make parsable
Expand Down
18 changes: 14 additions & 4 deletions SyllableDetector/ViewControllerProcessor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ struct ProcessorEntry {
let inputChannel: Int
var network: String = ""
var config: SyllableDetectorConfig?
var resampler: Resampler?
let outputChannel: Int

init(inputChannel: Int, outputChannel: Int) {
Expand Down Expand Up @@ -104,8 +105,17 @@ class Processor: AudioInputInterfaceDelegate {
vDSP_svesq(data, 1, &sum, vDSP_Length(length))
statInput[index].writeValue(Double(sum) / Double(length))

// append audio samples
detectors[index].appendAudioData(data, withSamples: length)
// resample
if let r = entries[index].resampler {
var resampledData = r.resampleVector(data, ofLength: length)

// append audio samples
detectors[index].appendAudioData(&resampledData, withSamples: resampledData.count)
}
else {
// append audio samples
detectors[index].appendAudioData(data, withSamples: length)
}

// process
dispatch_async(queueProcessing) {
Expand Down Expand Up @@ -383,12 +393,12 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
if let url = panel.URL, let path = url.path {
do {
// load file
var config = try SyllableDetectorConfig(fromTextFile: path)
let config = try SyllableDetectorConfig(fromTextFile: path)

// check sampling rate
if (1 < abs(config.samplingRate - self.deviceInput.sampleRateInput)) {
DLog("Mismatched sampling rates. Expecting: \(config.samplingRate). Device: \(self.deviceInput.sampleRateInput).")
config.modifySamplingRate(self.deviceInput.sampleRateInput)
self.processorEntries[row].resampler = ResamplerLinear(fromRate: self.deviceInput.sampleRateInput, toRate: config.samplingRate)
}

self.processorEntries[row].config = config
Expand Down

0 comments on commit 5507db0

Please sign in to comment.