From 086b1ab503bc6dce7543424a268413262bf6d035 Mon Sep 17 00:00:00 2001 From: Nathan Perkins Date: Tue, 5 Jan 2016 15:57:37 -0500 Subject: [PATCH] Resample audio rather than adjusting parameters Previously, attempted to adjust window size and overlap to handle different sampling rates, but this was error prone and imprecise. The frequency values would not line up. Instead, the new approach uses a generic resampler tool to resample audio. The current implementation only offers a very naive, but very fast linear interpolator. --- SyllableDetector.xcodeproj/project.pbxproj | 4 + SyllableDetector/Resampler.swift | 81 +++++++++++++++++++ SyllableDetector/SyllableDetectorConfig.swift | 26 +----- .../ViewControllerProcessor.swift | 18 ++++- 4 files changed, 101 insertions(+), 28 deletions(-) create mode 100644 SyllableDetector/Resampler.swift diff --git a/SyllableDetector.xcodeproj/project.pbxproj b/SyllableDetector.xcodeproj/project.pbxproj index ae91cf1..dc54bcd 100644 --- a/SyllableDetector.xcodeproj/project.pbxproj +++ b/SyllableDetector.xcodeproj/project.pbxproj @@ -20,6 +20,7 @@ D8625C9C1BE149FE000922D8 /* CircularShortTimeFourierTransform.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8625C931BE149FE000922D8 /* CircularShortTimeFourierTransform.swift */; }; D8625CA41BE14A53000922D8 /* TPCircularBuffer.c in Sources */ = {isa = PBXBuildFile; fileRef = D8625CA21BE14A53000922D8 /* TPCircularBuffer.c */; }; D89269891BF3AEDE009482C2 /* SummaryStat.swift in Sources */ = {isa = PBXBuildFile; fileRef = D89269881BF3AEDE009482C2 /* SummaryStat.swift */; }; + D89803B51C3C2ACF000CFC41 /* Resampler.swift in Sources */ = {isa = PBXBuildFile; fileRef = D89803B41C3C2ACF000CFC41 /* Resampler.swift */; }; D8B59D161BE6B77F0099CB4B /* StreamReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8B59D151BE6B77F0099CB4B /* StreamReader.swift */; }; D8D1BD481BE6D61B00059974 /* ViewControllerMenu.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8D1BD471BE6D61B00059974 /* ViewControllerMenu.swift */; }; D8D1BD4D1BE6E53C00059974 /* WindowControllerProcessor.swift in Sources */ = {isa = PBXBuildFile; fileRef = D8D1BD4C1BE6E53C00059974 /* WindowControllerProcessor.swift */; }; @@ -43,6 +44,7 @@ D8625CA31BE14A53000922D8 /* TPCircularBuffer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TPCircularBuffer.h; sourceTree = ""; }; D8625CA51BE14B8E000922D8 /* SyllableDetector-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "SyllableDetector-Bridging-Header.h"; sourceTree = ""; }; D89269881BF3AEDE009482C2 /* SummaryStat.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SummaryStat.swift; sourceTree = ""; }; + D89803B41C3C2ACF000CFC41 /* Resampler.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Resampler.swift; sourceTree = ""; }; D8B59D151BE6B77F0099CB4B /* StreamReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = StreamReader.swift; sourceTree = ""; }; D8D1BD471BE6D61B00059974 /* ViewControllerMenu.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewControllerMenu.swift; sourceTree = ""; }; D8D1BD4C1BE6E53C00059974 /* WindowControllerProcessor.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = WindowControllerProcessor.swift; sourceTree = ""; }; @@ -84,6 +86,7 @@ D8625C931BE149FE000922D8 /* CircularShortTimeFourierTransform.swift */, D8625C911BE149FE000922D8 /* Common.swift */, D8625C8E1BE149FE000922D8 /* NeuralNet.swift */, + D89803B41C3C2ACF000CFC41 /* Resampler.swift */, D8B59D151BE6B77F0099CB4B /* StreamReader.swift */, D89269881BF3AEDE009482C2 /* SummaryStat.swift */, D8625C8B1BE149FE000922D8 /* SyllableDetector.swift */, @@ -191,6 +194,7 @@ D8625C9A1BE149FE000922D8 /* Common.swift in Sources */, D82E7F7B1C10B629004759F7 /* ViewControllerSimulator.swift in Sources */, D8625C7F1BE14922000922D8 /* ViewControllerProcessor.swift in Sources */, + D89803B51C3C2ACF000CFC41 /* Resampler.swift in Sources */, D8625C951BE149FE000922D8 /* AudioInterface.swift in Sources */, D89269891BF3AEDE009482C2 /* SummaryStat.swift in Sources */, D8625C7D1BE14922000922D8 /* AppDelegate.swift in Sources */, diff --git a/SyllableDetector/Resampler.swift b/SyllableDetector/Resampler.swift new file mode 100644 index 0000000..2f84d88 --- /dev/null +++ b/SyllableDetector/Resampler.swift @@ -0,0 +1,81 @@ +// +// Resampler.swift +// SyllableDetector +// +// Created by Nathan Perkins on 1/5/16. +// Copyright © 2016 Gardner Lab. All rights reserved. +// + +import Foundation +import Accelerate + +protocol Resampler { + func resampleVector(data: UnsafePointer, ofLength numSamples: Int) -> [Float] +// func resampleVector(data: UnsafePointer, ofLength numSamples: Int, toOutput: UnsafeMutablePointer) -> Int +} + +// potentially use: http://www.mega-nerd.com/SRC/api_misc.html#Converters + +/// Terrible quality, very fast. +class ResamplerLinear: Resampler { + let samplingRateIn: Double + let samplingRateOut: Double + + private let step: Float + private var last: Float = 0.0 // used for interpolating across samples + private var offset: Float = 0.0 + + init(fromRate samplingRateIn: Double, toRate samplingRateOut: Double) { + self.samplingRateIn = samplingRateIn + self.samplingRateOut = samplingRateOut + + self.step = Float(samplingRateIn / samplingRateOut) + } + + func resampleVector(data: UnsafePointer, ofLength numSamplesIn: Int) -> [Float] { + // need to interpolate across last set of samples + let interpolateAcross = (offset < 0) + + // expected number of samples from current + let numSamplesOut = Int((Float(numSamplesIn) - offset) / step) + + // return list + var ret = [Float](count: numSamplesOut, repeatedValue: 0.0) + + // indices + let indices = UnsafeMutablePointer.alloc(numSamplesOut) + var t_offset = offset, t_step = step + defer { + indices.destroy() + indices.dealloc(numSamplesOut) + } + vDSP_vramp(&t_offset, &t_step, indices, 1, vDSP_Length(numSamplesOut)) + + if interpolateAcross { + indices[0] = 0.0 + } + + // interpolate + vDSP_vlint(data, indices, 1, &ret, 1, vDSP_Length(numSamplesOut), vDSP_Length(numSamplesIn)) + + if interpolateAcross { + ret[0] = (last * (0 - offset)) + (data[0] * (1 + offset)) + } + + offset = indices[numSamplesOut - 1] + step - Float(numSamplesIn - 1) + last = data[numSamplesIn - 1] + //print("\(indices[numSamplesOut - 1]) \(numSamplesIn) \(offset)") + + return ret + } + + func resampleArray(var arr: [Float]) -> [Float] { + // used for testing + return self.resampleVector(&arr, ofLength: arr.count) + } + +// func resampleVector(data: UnsafePointer, ofLength numSamples: Int, toOutput: UnsafeMutablePointer) -> Int { +// +// } +} + diff --git a/SyllableDetector/SyllableDetectorConfig.swift b/SyllableDetector/SyllableDetectorConfig.swift index 72f1f58..26539f8 100644 --- a/SyllableDetector/SyllableDetectorConfig.swift +++ b/SyllableDetector/SyllableDetectorConfig.swift @@ -29,10 +29,10 @@ struct SyllableDetectorConfig } } - var samplingRate: Double // eqv: samplerate + let samplingRate: Double // eqv: samplerate let fourierLength: Int // eqv: FFT_SIZE let windowLength: Int - var windowOverlap: Int // eqv: NOVERLAP = FFT_SIZE - (floor(samplerate * FFT_TIME_SHIFT)) + let windowOverlap: Int // eqv: NOVERLAP = FFT_SIZE - (floor(samplerate * FFT_TIME_SHIFT)) let freqRange: (Double, Double) // eqv: freq_range let timeRange: Int // eqv: time_window_steps = double(floor(time_window / timestep)) @@ -42,28 +42,6 @@ struct SyllableDetectorConfig let threshold: Double // eqv: trigger threshold let net: NeuralNet - - mutating func modifySamplingRate(newSamplingRate: Double) { - // store old things - let oldSamplingRate = samplingRate - let oldWindowOverlap = windowOverlap - - if oldSamplingRate == newSamplingRate { return } - - // window offset (difference in time between two consecutive windows) - // calculated using old values, but should be constant even after the new values - let windowOffset = Double(windowLength - oldWindowOverlap) / oldSamplingRate - - let newWindowOverlap = windowLength - Int(round(windowOffset * newSamplingRate)) - - // change the to new things - samplingRate = newSamplingRate - windowOverlap = newWindowOverlap - - DLog("Window offset: \(windowOffset)") - DLog("Sanity check: \(Double(windowLength - oldWindowOverlap) / oldSamplingRate)") - DLog("Window overlap: OLD \(oldWindowOverlap) NEW \(newWindowOverlap)") - } } // make parsable diff --git a/SyllableDetector/ViewControllerProcessor.swift b/SyllableDetector/ViewControllerProcessor.swift index cfb5c36..29d458a 100644 --- a/SyllableDetector/ViewControllerProcessor.swift +++ b/SyllableDetector/ViewControllerProcessor.swift @@ -14,6 +14,7 @@ struct ProcessorEntry { let inputChannel: Int var network: String = "" var config: SyllableDetectorConfig? + var resampler: Resampler? let outputChannel: Int init(inputChannel: Int, outputChannel: Int) { @@ -104,8 +105,17 @@ class Processor: AudioInputInterfaceDelegate { vDSP_svesq(data, 1, &sum, vDSP_Length(length)) statInput[index].writeValue(Double(sum) / Double(length)) - // append audio samples - detectors[index].appendAudioData(data, withSamples: length) + // resample + if let r = entries[index].resampler { + var resampledData = r.resampleVector(data, ofLength: length) + + // append audio samples + detectors[index].appendAudioData(&resampledData, withSamples: resampledData.count) + } + else { + // append audio samples + detectors[index].appendAudioData(data, withSamples: length) + } // process dispatch_async(queueProcessing) { @@ -383,12 +393,12 @@ class ViewControllerProcessor: NSViewController, NSTableViewDelegate, NSTableVie if let url = panel.URL, let path = url.path { do { // load file - var config = try SyllableDetectorConfig(fromTextFile: path) + let config = try SyllableDetectorConfig(fromTextFile: path) // check sampling rate if (1 < abs(config.samplingRate - self.deviceInput.sampleRateInput)) { DLog("Mismatched sampling rates. Expecting: \(config.samplingRate). Device: \(self.deviceInput.sampleRateInput).") - config.modifySamplingRate(self.deviceInput.sampleRateInput) + self.processorEntries[row].resampler = ResamplerLinear(fromRate: self.deviceInput.sampleRateInput, toRate: config.samplingRate) } self.processorEntries[row].config = config