Skip to content

Commit

Permalink
Resample audio rather than adjusting parameters
Browse files Browse the repository at this point in the history
Previously, attempted to adjust window size and overlap to handle different sampling rates,
but this was error prone and imprecise. The frequency values would not line up. Instead, the
new approach uses a generic resampler tool to resample audio. The current implementation
only offers a very naive, but very fast linear interpolator.
  • Loading branch information
nathanntg committed Jan 5, 2016
1 parent b0dd4a3 commit 086b1ab
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 28 deletions.
4 changes: 4 additions & 0 deletions SyllableDetector.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
D8625C9C1BE149FE000922D8 /* CircularShortTimeFourierTransform.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8625C931BE149FE000922D8 /* CircularShortTimeFourierTransform.swift */; };
D8625CA41BE14A53000922D8 /* TPCircularBuffer.c in Sources */ = {isa = PBXBuildFile; fileRef = D8625CA21BE14A53000922D8 /* TPCircularBuffer.c */; };
D89269891BF3AEDE009482C2 /* SummaryStat.swift in Sources */ = {isa = PBXBuildFile; fileRef = D89269881BF3AEDE009482C2 /* SummaryStat.swift */; };
D89803B51C3C2ACF000CFC41 /* Resampler.swift in Sources */ = {isa = PBXBuildFile; fileRef = D89803B41C3C2ACF000CFC41 /* Resampler.swift */; };
D8B59D161BE6B77F0099CB4B /* StreamReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8B59D151BE6B77F0099CB4B /* StreamReader.swift */; };
D8D1BD481BE6D61B00059974 /* ViewControllerMenu.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8D1BD471BE6D61B00059974 /* ViewControllerMenu.swift */; };
D8D1BD4D1BE6E53C00059974 /* WindowControllerProcessor.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8D1BD4C1BE6E53C00059974 /* WindowControllerProcessor.swift */; };
Expand All @@ -43,6 +44,7 @@
D8625CA31BE14A53000922D8 /* TPCircularBuffer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TPCircularBuffer.h; sourceTree = "<group>"; };
D8625CA51BE14B8E000922D8 /* SyllableDetector-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "SyllableDetector-Bridging-Header.h"; sourceTree = "<group>"; };
D89269881BF3AEDE009482C2 /* SummaryStat.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SummaryStat.swift; sourceTree = "<group>"; };
D89803B41C3C2ACF000CFC41 /* Resampler.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Resampler.swift; sourceTree = "<group>"; };
D8B59D151BE6B77F0099CB4B /* StreamReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = StreamReader.swift; sourceTree = "<group>"; };
D8D1BD471BE6D61B00059974 /* ViewControllerMenu.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewControllerMenu.swift; sourceTree = "<group>"; };
D8D1BD4C1BE6E53C00059974 /* WindowControllerProcessor.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = WindowControllerProcessor.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -84,6 +86,7 @@
D8625C931BE149FE000922D8 /* CircularShortTimeFourierTransform.swift */,
D8625C911BE149FE000922D8 /* Common.swift */,
D8625C8E1BE149FE000922D8 /* NeuralNet.swift */,
D89803B41C3C2ACF000CFC41 /* Resampler.swift */,
D8B59D151BE6B77F0099CB4B /* StreamReader.swift */,
D89269881BF3AEDE009482C2 /* SummaryStat.swift */,
D8625C8B1BE149FE000922D8 /* SyllableDetector.swift */,
Expand Down Expand Up @@ -191,6 +194,7 @@
D8625C9A1BE149FE000922D8 /* Common.swift in Sources */,
D82E7F7B1C10B629004759F7 /* ViewControllerSimulator.swift in Sources */,
D8625C7F1BE14922000922D8 /* ViewControllerProcessor.swift in Sources */,
D89803B51C3C2ACF000CFC41 /* Resampler.swift in Sources */,
D8625C951BE149FE000922D8 /* AudioInterface.swift in Sources */,
D89269891BF3AEDE009482C2 /* SummaryStat.swift in Sources */,
D8625C7D1BE14922000922D8 /* AppDelegate.swift in Sources */,
Expand Down
81 changes: 81 additions & 0 deletions SyllableDetector/Resampler.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
//
// Resampler.swift
// SyllableDetector
//
// Created by Nathan Perkins on 1/5/16.
// Copyright © 2016 Gardner Lab. All rights reserved.
//

import Foundation
import Accelerate

protocol Resampler {
func resampleVector(data: UnsafePointer<Float>, ofLength numSamples: Int) -> [Float]
// func resampleVector(data: UnsafePointer<Float>, ofLength numSamples: Int, toOutput: UnsafeMutablePointer<Float>) -> Int
}

// potentially use: http://www.mega-nerd.com/SRC/api_misc.html#Converters

/// Terrible quality, very fast.
class ResamplerLinear: Resampler {
let samplingRateIn: Double
let samplingRateOut: Double

private let step: Float
private var last: Float = 0.0 // used for interpolating across samples
private var offset: Float = 0.0

init(fromRate samplingRateIn: Double, toRate samplingRateOut: Double) {
self.samplingRateIn = samplingRateIn
self.samplingRateOut = samplingRateOut

self.step = Float(samplingRateIn / samplingRateOut)
}

func resampleVector(data: UnsafePointer<Float>, ofLength numSamplesIn: Int) -> [Float] {
// need to interpolate across last set of samples
let interpolateAcross = (offset < 0)

// expected number of samples from current
let numSamplesOut = Int((Float(numSamplesIn) - offset) / step)

// return list
var ret = [Float](count: numSamplesOut, repeatedValue: 0.0)

// indices
let indices = UnsafeMutablePointer<Float>.alloc(numSamplesOut)
var t_offset = offset, t_step = step
defer {
indices.destroy()
indices.dealloc(numSamplesOut)
}
vDSP_vramp(&t_offset, &t_step, indices, 1, vDSP_Length(numSamplesOut))

if interpolateAcross {
indices[0] = 0.0
}

// interpolate
vDSP_vlint(data, indices, 1, &ret, 1, vDSP_Length(numSamplesOut), vDSP_Length(numSamplesIn))

if interpolateAcross {
ret[0] = (last * (0 - offset)) + (data[0] * (1 + offset))
}

offset = indices[numSamplesOut - 1] + step - Float(numSamplesIn - 1)
last = data[numSamplesIn - 1]
//print("\(indices[numSamplesOut - 1]) \(numSamplesIn) \(offset)")

return ret
}

func resampleArray(var arr: [Float]) -> [Float] {
// used for testing
return self.resampleVector(&arr, ofLength: arr.count)
}

// func resampleVector(data: UnsafePointer<Float>, ofLength numSamples: Int, toOutput: UnsafeMutablePointer<Float>) -> Int {
//
// }
}

26 changes: 2 additions & 24 deletions SyllableDetector/SyllableDetectorConfig.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ struct SyllableDetectorConfig
}
}

var samplingRate: Double // eqv: samplerate
let samplingRate: Double // eqv: samplerate
let fourierLength: Int // eqv: FFT_SIZE
let windowLength: Int
var windowOverlap: Int // eqv: NOVERLAP = FFT_SIZE - (floor(samplerate * FFT_TIME_SHIFT))
let windowOverlap: Int // eqv: NOVERLAP = FFT_SIZE - (floor(samplerate * FFT_TIME_SHIFT))

let freqRange: (Double, Double) // eqv: freq_range
let timeRange: Int // eqv: time_window_steps = double(floor(time_window / timestep))
Expand All @@ -42,28 +42,6 @@ struct SyllableDetectorConfig
let threshold: Double // eqv: trigger threshold

let net: NeuralNet

mutating func modifySamplingRate(newSamplingRate: Double) {
// store old things
let oldSamplingRate = samplingRate
let oldWindowOverlap = windowOverlap

if oldSamplingRate == newSamplingRate { return }

// window offset (difference in time between two consecutive windows)
// calculated using old values, but should be constant even after the new values
let windowOffset = Double(windowLength - oldWindowOverlap) / oldSamplingRate

let newWindowOverlap = windowLength - Int(round(windowOffset * newSamplingRate))

// change the to new things
samplingRate = newSamplingRate
windowOverlap = newWindowOverlap

DLog("Window offset: \(windowOffset)")
DLog("Sanity check: \(Double(windowLength - oldWindowOverlap) / oldSamplingRate)")
DLog("Window overlap: OLD \(oldWindowOverlap) NEW \(newWindowOverlap)")
}
}

// make parsable
Expand Down
18 changes: 14 additions & 4 deletions SyllableDetector/ViewControllerProcessor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ struct ProcessorEntry {
let inputChannel: Int
var network: String = ""
var config: SyllableDetectorConfig?
var resampler: Resampler?
let outputChannel: Int

init(inputChannel: Int, outputChannel: Int) {
Expand Down Expand Up @@ -104,8 +105,17 @@ class Processor: AudioInputInterfaceDelegate {
vDSP_svesq(data, 1, &sum, vDSP_Length(length))
statInput[index].writeValue(Double(sum) / Double(length))

// append audio samples
detectors[index].appendAudioData(data, withSamples: length)
// resample
if let r = entries[index].resampler {
var resampledData = r.resampleVector(data, ofLength: length)

// append audio samples
detectors[index].appendAudioData(&resampledData, withSamples: resampledData.count)
}
else {
// append audio samples
detectors[index].appendAudioData(data, withSamples: length)
}

// process
dispatch_async(queueProcessing) {
Expand Down Expand Up @@ -383,12 +393,12 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie
if let url = panel.URL, let path = url.path {
do {
// load file
var config = try SyllableDetectorConfig(fromTextFile: path)
let config = try SyllableDetectorConfig(fromTextFile: path)

// check sampling rate
if (1 < abs(config.samplingRate - self.deviceInput.sampleRateInput)) {
DLog("Mismatched sampling rates. Expecting: \(config.samplingRate). Device: \(self.deviceInput.sampleRateInput).")
config.modifySamplingRate(self.deviceInput.sampleRateInput)
self.processorEntries[row].resampler = ResamplerLinear(fromRate: self.deviceInput.sampleRateInput, toRate: config.samplingRate)
}

self.processorEntries[row].config = config
Expand Down

0 comments on commit 086b1ab

Please sign in to comment.