diff --git a/SyllableDetector/SyllableDetector.swift b/SyllableDetector/SyllableDetector.swift index 98faa7d..5b4a407 100644 --- a/SyllableDetector/SyllableDetector.swift +++ b/SyllableDetector/SyllableDetector.swift @@ -39,7 +39,7 @@ class SyllableDetector: NSObject, AVCaptureAudioDataOutputSampleBufferDelegate self.config = config // initialize the FFT - shortTimeFourierTransform = CircularShortTimeFourierTransform(windowLength: config.windowLength, withOverlap: config.fourierOverlap, fftSizeOf: config.fourierLength) + shortTimeFourierTransform = CircularShortTimeFourierTransform(windowLength: config.windowLength, withOverlap: config.windowOverlap, fftSizeOf: config.fourierLength) shortTimeFourierTransform.windowType = WindowType.Hamming // store frequency indices diff --git a/SyllableDetector/SyllableDetectorConfig.swift b/SyllableDetector/SyllableDetectorConfig.swift index fe41419..72f1f58 100644 --- a/SyllableDetector/SyllableDetectorConfig.swift +++ b/SyllableDetector/SyllableDetectorConfig.swift @@ -30,9 +30,9 @@ struct SyllableDetectorConfig } var samplingRate: Double // eqv: samplerate - var windowLength: Int - var fourierLength: Int // eqv: FFT_SIZE - var fourierOverlap: Int // eqv: NOVERLAP = FFT_SIZE - (floor(samplerate * FFT_TIME_SHIFT)) + let fourierLength: Int // eqv: FFT_SIZE + let windowLength: Int + var windowOverlap: Int // eqv: NOVERLAP = FFT_SIZE - (floor(samplerate * FFT_TIME_SHIFT)) let freqRange: (Double, Double) // eqv: freq_range let timeRange: Int // eqv: time_window_steps = double(floor(time_window / timestep)) @@ -46,28 +46,23 @@ struct SyllableDetectorConfig mutating func modifySamplingRate(newSamplingRate: Double) { // store old things let oldSamplingRate = samplingRate - let oldFourierLength = fourierLength - let oldFourierOverlap = fourierOverlap + let oldWindowOverlap = windowOverlap if oldSamplingRate == newSamplingRate { return } + // window offset (difference in time between two consecutive windows) + // calculated using old values, but should be constant even after the new values + let windowOffset = Double(windowLength - oldWindowOverlap) / oldSamplingRate - // get new approximate fourier length - let newApproximateFourierLength = newSamplingRate * Double(oldFourierLength) / oldSamplingRate - - // convert to closest power of 2 - let newFourierLength = 1 << Int(round(log2(newApproximateFourierLength))) - - // get new fourier overlap - let newFourierOverlap = newFourierLength - Int(round(newSamplingRate * Double(oldFourierLength - oldFourierOverlap) / oldSamplingRate)) + let newWindowOverlap = windowLength - Int(round(windowOffset * newSamplingRate)) // change the to new things samplingRate = newSamplingRate - fourierLength = newFourierLength - fourierOverlap = newFourierOverlap + windowOverlap = newWindowOverlap - DLog("New fourier length: \(newFourierLength)") - DLog("New fourier overlap: \(newFourierOverlap)") + DLog("Window offset: \(windowOffset)") + DLog("Sanity check: \(Double(windowLength - oldWindowOverlap) / oldSamplingRate)") + DLog("Window overlap: OLD \(oldWindowOverlap) NEW \(newWindowOverlap)") } } @@ -234,7 +229,7 @@ extension SyllableDetectorConfig } // fourier length: int - fourierOverlap = try SyllableDetectorConfig.parseInt("fourierOverlap", from: data) + windowOverlap = try SyllableDetectorConfig.parseInt("windowOverlap", from: data) // frequency range: double, double let potentialFreqRange = try SyllableDetectorConfig.parseDoubleArray("freqRange", withCount: 2, from: data) diff --git a/SyllableDetector/ViewControllerSimulator.swift b/SyllableDetector/ViewControllerSimulator.swift index 3d5b040..4260372 100644 --- a/SyllableDetector/ViewControllerSimulator.swift +++ b/SyllableDetector/ViewControllerSimulator.swift @@ -242,7 +242,10 @@ class ViewControllerSimulator: NSViewController { assert(status == noErr) // processing - var nextCount: Int = sd.config.fourierLength + ((sd.config.fourierLength - sd.config.fourierOverlap) * (sd.config.timeRange - 1)), nextValue: Float = 0.0 + var nextCount: Int = sd.config.windowLength + ((sd.config.windowLength - sd.config.windowOverlap) * (sd.config.timeRange - 1)), nextValue: Float = 0.0 + if sd.config.windowOverlap < 0 { + nextCount = nextCount - sd.config.windowOverlap // since gap is applied even to the first data set + } var samplePosition: Int64 = 0 let gcdGroup = dispatch_group_create() let gcdQueue = dispatch_queue_create("Encode", DISPATCH_QUEUE_SERIAL) @@ -296,7 +299,7 @@ class ViewControllerSimulator: NSViewController { } // length - var l = sd.config.fourierLength - sd.config.fourierOverlap + var l = sd.config.windowLength - sd.config.windowOverlap for ; 0 < l && i < numSamples; ++i, --l { newSamples[i] = v @@ -325,6 +328,8 @@ class ViewControllerSimulator: NSViewController { // append sample buffer if !avWriterInput.appendSampleBuffer(newSampleBuffer!) { + DLog("failed to write sample buffer \(avWriter.status) \(avWriter.error)") + avReader.cancelReading() // cancel reading completedOrFailed = true } } diff --git a/convert_to_text.m b/convert_to_text.m index 3e1a9c8..74d74d7 100644 --- a/convert_to_text.m +++ b/convert_to_text.m @@ -29,9 +29,9 @@ function convert_to_text(fn, mat) fprintf(fh, '# AUTOMATICALLY GENERATED SYLLABLE DETECTOR CONFIGURATION\n'); fprintf(fh, 'samplingRate = %.1f\n', f.samplerate); -fprintf(fh, 'windowLength = %d\n', f.win_size); fprintf(fh, 'fourierLength = %d\n', f.fft_size); -fprintf(fh, 'fourierOverlap = %d\n', f.fft_size - f.fft_time_shift); +fprintf(fh, 'windowLength = %d\n', f.win_size); +fprintf(fh, 'windowOverlap = %d\n', f.fft_size - f.fft_time_shift); fprintf(fh, 'freqRange = %.1f, %.1f\n', f.freq_range(1), f.freq_range(end)); fprintf(fh, 'timeRange = %d\n', f.time_window_steps);