From 8ae2594fbc114a9ab77ebeb30f2220ca7c3f331f Mon Sep 17 00:00:00 2001 From: Ian Lavery Date: Thu, 3 Aug 2023 12:17:23 -0700 Subject: [PATCH] iOS vp update (#565) --- binding/ios/Rhino-iOS.podspec | 16 ++-- binding/ios/RhinoAppTest/Podfile | 6 +- binding/ios/RhinoAppTest/Podfile.lock | 16 ++-- binding/ios/RhinoManager.swift | 124 ++++++++++++++------------ demo/ios/Podfile | 4 +- demo/ios/Podfile.lock | 16 ++-- demo/ios/RhinoDemo/ContentView.swift | 46 +++++++--- 7 files changed, 133 insertions(+), 95 deletions(-) diff --git a/binding/ios/Rhino-iOS.podspec b/binding/ios/Rhino-iOS.podspec index 637e3c6f..7e50d42e 100644 --- a/binding/ios/Rhino-iOS.podspec +++ b/binding/ios/Rhino-iOS.podspec @@ -1,14 +1,14 @@ Pod::Spec.new do |s| s.name = 'Rhino-iOS' s.module_name = 'Rhino' - s.version = '2.2.1' + s.version = '2.2.2' s.license = {:type => 'Apache 2.0'} s.summary = 'iOS SDK for Picovoice\'s Rhino Speech-to-Intent engine' - s.description = + s.description = <<-DESC Rhino is Picovoice's Speech-to-Intent engine. It directly infers intent from spoken commands within a given context of interest, in real-time. For example, given a spoken command *"Can I have a small double-shot espresso?"*, Rhino infers that the user wants to order a drink and emits the following inference result: - + ```json { "type": "espresso", @@ -16,15 +16,15 @@ Pod::Spec.new do |s| "numberOfShots": "2" } ``` - - Rhino is: + + Rhino is: * using deep neural networks trained in real-world environments. * compact and computationally-efficient, making it perfect for IoT. * self-service. Developers and designers can train custom models using [Picovoice Console](https://picovoice.ai/console/). DESC s.homepage = 'https://github.com/Picovoice/rhino/tree/master/binding/ios' s.author = { 'Picovoice' => 'hello@picovoice.ai' } - s.source = { :git => "https://github.com/Picovoice/rhino.git", :tag => "Rhino-iOS-v2.2.1" } + s.source = { :git => "https://github.com/Picovoice/rhino.git", :tag => "Rhino-iOS-v2.2.2" } s.ios.deployment_target = '11.0' s.swift_version = '5.0' s.vendored_frameworks = 'lib/ios/PvRhino.xcframework' @@ -35,6 +35,6 @@ Pod::Spec.new do |s| } s.source_files = 'binding/ios/*.{swift}' s.exclude_files = 'binding/ios/RhinoAppTest/**' - - s.dependency 'ios-voice-processor', '~> 1.0.2' + + s.dependency 'ios-voice-processor', '~> 1.1.0' end diff --git a/binding/ios/RhinoAppTest/Podfile b/binding/ios/RhinoAppTest/Podfile index 0f9a0f5d..4f3da0db 100644 --- a/binding/ios/RhinoAppTest/Podfile +++ b/binding/ios/RhinoAppTest/Podfile @@ -2,13 +2,13 @@ source 'https://cdn.cocoapods.org/' platform :ios, '11.0' target 'RhinoAppTest' do - pod 'Rhino-iOS', '~> 2.2.1' + pod 'Rhino-iOS', '~> 2.2.2' end target 'RhinoAppTestUITests' do - pod 'Rhino-iOS', '~> 2.2.1' + pod 'Rhino-iOS', '~> 2.2.2' end target 'PerformanceTest' do - pod 'Rhino-iOS', '~> 2.2.1' + pod 'Rhino-iOS', '~> 2.2.2' end diff --git a/binding/ios/RhinoAppTest/Podfile.lock b/binding/ios/RhinoAppTest/Podfile.lock index 4db8415a..bdb08da0 100644 --- a/binding/ios/RhinoAppTest/Podfile.lock +++ b/binding/ios/RhinoAppTest/Podfile.lock @@ -1,10 +1,10 @@ PODS: - - ios-voice-processor (1.0.3) - - Rhino-iOS (2.2.1): - - ios-voice-processor (~> 1.0.2) + - ios-voice-processor (1.1.0) + - Rhino-iOS (2.2.2): + - ios-voice-processor (~> 1.1.0) DEPENDENCIES: - - Rhino-iOS (~> 2.2.1) + - Rhino-iOS (~> 2.2.2) SPEC REPOS: trunk: @@ -12,9 +12,9 @@ SPEC REPOS: - Rhino-iOS SPEC CHECKSUMS: - ios-voice-processor: 65b25a8db69ea25ffba0eeef37bae71a982f34cc - Rhino-iOS: c6671667cfda310b8367e7c3611f247cb256f10e + ios-voice-processor: 8e32d7f980a06d392d128ef1cd19cf6ddcaca3c1 + Rhino-iOS: 0fad86b28d35f67ccb6bd0a2efbbcc0d88b05124 -PODFILE CHECKSUM: 851b1a06103d4995d5b7532f8391f06cb5afe66b +PODFILE CHECKSUM: 05ba209bb437f842984821a9bdca751766241044 -COCOAPODS: 1.11.2 +COCOAPODS: 1.11.3 diff --git a/binding/ios/RhinoManager.swift b/binding/ios/RhinoManager.swift index bd1386ed..8372a805 100644 --- a/binding/ios/RhinoManager.swift +++ b/binding/ios/RhinoManager.swift @@ -9,21 +9,17 @@ import ios_voice_processor -public enum RhinoManagerError: Error { - case recordingDenied - case objectDisposed -} - /// High-level iOS binding for Rhino Speech-to-Intent engine. It handles recording /// audio from microphone, processes it in real-time using Rhino, and notifies the client /// when an intent is inferred from the spoken command. public class RhinoManager { - private var onInferenceCallback: ((Inference) -> Void)? - private var processErrorCallback: ((Error) -> Void)? + private var rhino: Rhino? - private var started = false - private var stop = false + private var frameListener: VoiceProcessorFrameListener? + private var errorListener: VoiceProcessorErrorListener? + + private var isListening = false public var contextInfo: String { get { @@ -53,7 +49,7 @@ public class RhinoManager { /// - onInferenceCallback: It is invoked upon completion of intent inference. /// - processErrorCallback: Invoked if an error occurs while processing frames. /// If missing, error will be printed to console. - /// - Throws: RhinoManagerError + /// - Throws: RhinoError public init( accessKey: String, contextPath: String, @@ -63,8 +59,35 @@ public class RhinoManager { requireEndpoint: Bool = true, onInferenceCallback: ((Inference) -> Void)?, processErrorCallback: ((Error) -> Void)? = nil) throws { - self.onInferenceCallback = onInferenceCallback - self.processErrorCallback = processErrorCallback + self.errorListener = VoiceProcessorErrorListener({ error in + guard let callback = processErrorCallback else { + print("\(error.errorDescription)") + return + } + callback(RhinoError(error.errorDescription)) + }) + + self.frameListener = VoiceProcessorFrameListener({ frame in + guard let rhino = self.rhino else { + return + } + + do { + let isFinalized: Bool = try rhino.process(pcm: frame) + if isFinalized { + let inference: Inference = try rhino.getInference() + onInferenceCallback?(inference) + try self.stop() + } + } catch { + guard let callback = processErrorCallback else { + print("\(error)") + return + } + callback(error) + } + }) + self.rhino = try Rhino( accessKey: accessKey, contextPath: contextPath, @@ -74,13 +97,18 @@ public class RhinoManager { } deinit { - self.delete() + if self.rhino != nil { + self.rhino!.delete() + self.rhino = nil + } } /// Stops recording and releases Rhino resources - public func delete() { - if self.started { - self.stop = true + /// + /// - Throws: RhinoError if there was an error stopping RhinoManager + public func delete() throws { + if isListening { + try stop() } if self.rhino != nil { @@ -93,61 +121,45 @@ public class RhinoManager { /// from the spoken command. Once the inference is finalized it will invoke the user /// provided callback and terminates recording audio. /// - /// - Throws: AVAudioSession, AVAudioEngine errors. Additionally RhinoManagerError if - /// microphone permission is not granted or Rhino has been disposed. + /// - Throws: RhinoError if there was an error starting RhinoManager public func process() throws { - if self.started { + guard !isListening else { return } if rhino == nil { - throw RhinoManagerError.objectDisposed + throw RhinoInvalidStateError("Rhino has been deleted.") } - // Only check if it's denied, permission will be automatically asked. - guard try VoiceProcessor.shared.hasPermissions() else { - throw RhinoManagerError.recordingDenied - } + VoiceProcessor.instance.addErrorListener(errorListener!) + VoiceProcessor.instance.addFrameListener(frameListener!) - let dispatchQueue = DispatchQueue(label: "RhinoManagerWatcher", qos: .background) - dispatchQueue.async { - while !self.stop { - usleep(10000) - } - VoiceProcessor.shared.stop() - - self.started = false - self.stop = false + do { + try VoiceProcessor.instance.start( + frameLength: Rhino.frameLength, + sampleRate: Rhino.sampleRate + ) + } catch { + throw RhinoError(error.localizedDescription) } - - try VoiceProcessor.shared.start( - frameLength: Rhino.frameLength, - sampleRate: Rhino.sampleRate, - audioCallback: self.audioCallback - ) - - self.started = true + isListening = true } - /// Callback to run after after voice processor processes frames. - private func audioCallback(pcm: [Int16]) { - guard self.rhino != nil else { + private func stop() throws { + guard isListening else { return } - do { - let isFinalized: Bool = try self.rhino!.process(pcm: pcm) - if isFinalized { - let inference: Inference = try self.rhino!.getInference() - self.onInferenceCallback?(inference) - self.stop = true - } - } catch { - if self.processErrorCallback != nil { - self.processErrorCallback!(error) - } else { - print("\(error)") + VoiceProcessor.instance.removeErrorListener(errorListener!) + VoiceProcessor.instance.removeFrameListener(frameListener!) + + if VoiceProcessor.instance.numFrameListeners == 0 { + do { + try VoiceProcessor.instance.stop() + } catch { + throw RhinoError(error.localizedDescription) } } + isListening = false } } diff --git a/demo/ios/Podfile b/demo/ios/Podfile index 25d646b1..ec985ed9 100644 --- a/demo/ios/Podfile +++ b/demo/ios/Podfile @@ -1,6 +1,6 @@ source 'https://cdn.cocoapods.org/' platform :ios, '11.0' -target 'RhinoDemo' do - pod 'Rhino-iOS', '~> 2.2.1' +target 'RhinoDemo' do + pod 'Rhino-iOS', '~> 2.2.2' end diff --git a/demo/ios/Podfile.lock b/demo/ios/Podfile.lock index bb2c67a8..ef360c98 100644 --- a/demo/ios/Podfile.lock +++ b/demo/ios/Podfile.lock @@ -1,10 +1,10 @@ PODS: - - ios-voice-processor (1.0.3) - - Rhino-iOS (2.2.1): - - ios-voice-processor (~> 1.0.2) + - ios-voice-processor (1.1.0) + - Rhino-iOS (2.2.2): + - ios-voice-processor (~> 1.1.0) DEPENDENCIES: - - Rhino-iOS (~> 2.2.1) + - Rhino-iOS (~> 2.2.2) SPEC REPOS: trunk: @@ -12,9 +12,9 @@ SPEC REPOS: - Rhino-iOS SPEC CHECKSUMS: - ios-voice-processor: 65b25a8db69ea25ffba0eeef37bae71a982f34cc - Rhino-iOS: c6671667cfda310b8367e7c3611f247cb256f10e + ios-voice-processor: 8e32d7f980a06d392d128ef1cd19cf6ddcaca3c1 + Rhino-iOS: 0fad86b28d35f67ccb6bd0a2efbbcc0d88b05124 -PODFILE CHECKSUM: cc62b80891d9f6f30ca908bf7d14844eceab0f7a +PODFILE CHECKSUM: 51a859aec88810117dab1a555c9742561c3aa12d -COCOAPODS: 1.11.2 +COCOAPODS: 1.11.3 diff --git a/demo/ios/RhinoDemo/ContentView.swift b/demo/ios/RhinoDemo/ContentView.swift index 0f8004e2..4b95856f 100644 --- a/demo/ios/RhinoDemo/ContentView.swift +++ b/demo/ios/RhinoDemo/ContentView.swift @@ -8,6 +8,8 @@ // import SwiftUI + +import ios_voice_processor import Rhino struct SheetView: View { @@ -72,6 +74,11 @@ struct ContentView: View { self.buttonLabel = "START" } + }, + processErrorCallback: { error in + DispatchQueue.main.async { + errorMessage = "\(error)" + } }) self.contextInfo = self.rhinoManager.contextInfo } catch let error as RhinoInvalidArgumentError { @@ -90,6 +97,22 @@ struct ContentView: View { } } + func startListening() { + self.result = "" + if self.rhinoManager == nil { + initRhino() + } + + do { + if self.rhinoManager != nil { + try self.rhinoManager.process() + self.buttonLabel = " ... " + } + } catch { + errorMessage = "\(error)" + } + } + var body: some View { NavigationView { VStack { @@ -110,20 +133,23 @@ struct ContentView: View { Spacer() Button { if self.buttonLabel == "START" { - self.result = "" - if self.rhinoManager == nil { - initRhino() - } + guard VoiceProcessor.hasRecordAudioPermission else { + VoiceProcessor.requestRecordAudioPermission { isGranted in + guard isGranted else { + DispatchQueue.main.async { + self.errorMessage = "Demo requires microphone permission" + } + return + } - do { - if self.rhinoManager != nil { - try self.rhinoManager.process() - self.buttonLabel = " ... " + DispatchQueue.main.async { + self.startListening() + } } - } catch { - errorMessage = "\(error)" + return } + startListening() } else { self.buttonLabel = "START" }