Skip to content

Commit

Permalink
clean up recognizer a little, fix deadlock
Browse files Browse the repository at this point in the history
  • Loading branch information
crc-32 committed Nov 10, 2024
1 parent 74e174a commit 2490cef
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 99 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package io.rebble.cobble.shared.domain.voice.speechrecognizer

data class RecognitionLanguage(
val tag: String,
val downloaded: Boolean,
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,14 @@ import android.speech.RecognitionSupportCallback
import android.speech.SpeechRecognizer
import androidx.annotation.RequiresApi
import androidx.compose.ui.text.intl.Locale
import io.rebble.cobble.shared.Logging
import kotlinx.coroutines.CompletableDeferred
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.asExecutor

enum class RecognitionSupportResult {
SupportedOnDevice,
SupportedOnline,
NeedsDownload,
Unsupported
}

@RequiresApi(VERSION_CODES.TIRAMISU)
suspend fun SpeechRecognizer.checkRecognitionSupport(intent: Intent): RecognitionSupportResult {
suspend fun SpeechRecognizer.checkRecognitionSupport(intent: Intent): RecognitionSupport {
val result = CompletableDeferred<RecognitionSupport>()
val language = Locale.current.toLanguageTag()
val executor = Dispatchers.IO.asExecutor()
checkRecognitionSupport(intent, executor, object : RecognitionSupportCallback {
override fun onSupportResult(recognitionSupport: RecognitionSupport) {
Expand All @@ -34,11 +27,6 @@ suspend fun SpeechRecognizer.checkRecognitionSupport(intent: Intent): Recognitio
}
})
val support = result.await()
return when {
support.supportedOnDeviceLanguages.contains(language) -> RecognitionSupportResult.SupportedOnDevice
support.installedOnDeviceLanguages.contains(language) -> RecognitionSupportResult.SupportedOnDevice
support.onlineLanguages.contains(language) -> RecognitionSupportResult.SupportedOnline
support.pendingOnDeviceLanguages.contains(language) -> RecognitionSupportResult.NeedsDownload
else -> RecognitionSupportResult.Unsupported
}
Logging.d("Locale: ${Locale.current.toLanguageTag()}, Recognition support: $support")
return support
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import android.media.AudioTrack
import android.os.Build.VERSION_CODES
import android.os.Bundle
import android.os.ParcelFileDescriptor
import android.os.ParcelFileDescriptor.AutoCloseOutputStream
import android.speech.*
import androidx.annotation.RequiresApi
import androidx.compose.ui.text.intl.Locale
Expand Down Expand Up @@ -46,15 +47,17 @@ class SpeechRecognizerDictationService: DictationService, KoinComponent {

companion object {
private val AUDIO_LATENCY = 600.milliseconds
fun buildRecognizerIntent(audioSource: ParcelFileDescriptor? = null, encoding: Int = AudioFormat.ENCODING_PCM_16BIT, sampleRate: Int = 16000) = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
fun buildRecognizerIntent(audioSource: ParcelFileDescriptor? = null, encoding: Int = AudioFormat.ENCODING_PCM_16BIT, sampleRate: Int = 16000, language: String? = null) = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
audioSource?.let {
putExtra(RecognizerIntent.EXTRA_AUDIO_SOURCE, audioSource)
putExtra(RecognizerIntent.EXTRA_AUDIO_SOURCE_ENCODING, encoding)
putExtra(RecognizerIntent.EXTRA_AUDIO_SOURCE_CHANNEL_COUNT, 1)
putExtra(RecognizerIntent.EXTRA_AUDIO_SOURCE_SAMPLING_RATE, sampleRate)
}
putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.current.toLanguageTag())
language?.let {
putExtra(RecognizerIntent.EXTRA_LANGUAGE, language)
}
}
}

Expand Down Expand Up @@ -115,34 +118,55 @@ class SpeechRecognizerDictationService: DictationService, KoinComponent {
}
}.flowOn(Dispatchers.Main)

private suspend fun SpeechRecognizer.getBestRecognitionLanguage(recognizerIntent: Intent): RecognitionLanguage? {
val support = withContext(Dispatchers.Main) {
this@getBestRecognitionLanguage.checkRecognitionSupport(recognizerIntent)
}
val locale = Locale.current.toLanguageTag()
val installedBest = support.installedOnDeviceLanguages.firstOrNull { locale.startsWith(it) }
val availableBest = support.supportedOnDeviceLanguages.firstOrNull { locale.startsWith(it) }
return when {
installedBest != null -> RecognitionLanguage(installedBest, true)
availableBest != null -> RecognitionLanguage(availableBest, false)
else -> null
}
}

private fun createRecognizerPipes(): Pair<ParcelFileDescriptor, AutoCloseOutputStream> {
val recognizerPipes = ParcelFileDescriptor.createSocketPair()
val recognizerReadPipe = recognizerPipes[0]
val recognizerWritePipe = AutoCloseOutputStream(recognizerPipes[1])
return recognizerReadPipe to recognizerWritePipe
}

override fun handleSpeechStream(speexEncoderInfo: SpeexEncoderInfo, audioStreamFrames: Flow<AudioStreamFrame>) = flow {
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
if (!SpeechRecognizer.isOnDeviceRecognitionAvailable(context)) {
Logging.e("Offline speech recognition not available")
emit(DictationServiceResponse.Error(Result.FailServiceUnavailable))
return@flow
}
val decoder = SpeexCodec(speexEncoderInfo.sampleRate, speexEncoderInfo.bitRate, speexEncoderInfo.frameSize, setOf(SpeexCodec.Preprocessor.DENOISE, SpeexCodec.Preprocessor.AGC))
val decodeBufLength = Short.SIZE_BYTES * speexEncoderInfo.frameSize
val decodedBuf = ByteBuffer.allocateDirect(decodeBufLength)
decodedBuf.order(ByteOrder.nativeOrder())
val recognizerPipes = ParcelFileDescriptor.createSocketPair()
val recognizerReadPipe = recognizerPipes[0]
val recognizerWritePipe = ParcelFileDescriptor.AutoCloseOutputStream(recognizerPipes[1])
val recognizerIntent = buildRecognizerIntent(recognizerReadPipe, AudioFormat.ENCODING_PCM_16BIT, speexEncoderInfo.sampleRate.toInt())
//val recognizerIntent = buildRecognizerIntent()

val (recognizerReadPipe, recognizerWritePipe) = createRecognizerPipes()
val speechRecognizer = withContext(Dispatchers.Main) {
SpeechRecognizer.createOnDeviceSpeechRecognizer(context)
}
val supported = withContext(Dispatchers.Main) {
speechRecognizer.checkRecognitionSupport(recognizerIntent)
val recognizerIntent = buildRecognizerIntent(recognizerReadPipe, AudioFormat.ENCODING_PCM_16BIT, speexEncoderInfo.sampleRate.toInt())
val recognitionLanguage = speechRecognizer.getBestRecognitionLanguage(recognizerIntent)
if (recognitionLanguage == null) {
Logging.e("No recognition language available")
emit(DictationServiceResponse.Error(Result.FailServiceUnavailable))
return@flow
}

//TODO: handle downloads, etc
Logging.d("Recognition support: $supported")
if (supported == RecognitionSupportResult.Unsupported) {
Logging.e("Speech recognition language/type not supported")
if (!recognitionLanguage.downloaded) {
Logging.e("Recognition language not downloaded: ${recognitionLanguage.tag}")
emit(DictationServiceResponse.Error(Result.FailServiceUnavailable))
return@flow
}
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, recognitionLanguage.tag)
//audioTrack.play()

val audioJob = scope.launch {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ import io.rebble.cobble.shared.domain.voice.SpeexEncoderInfo
import io.rebble.cobble.shared.domain.voice.VoiceSession
import io.rebble.libpebblecommon.packets.*
import io.rebble.libpebblecommon.util.DataBuffer
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.flow.*
import kotlinx.coroutines.launch
import kotlinx.coroutines.withTimeout
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import kotlin.time.Duration.Companion.minutes

class VoiceSessionHandler(
private val pebbleDevice: PebbleDevice,
Expand Down Expand Up @@ -44,6 +47,103 @@ class VoiceSessionHandler(
)
}

private suspend fun handleSpeechStream(voiceSession: VoiceSession) {
val appInitiated = voiceSession.appUuid != null
var sentReady = false
voiceSession.recognizer.handleSpeechStream(voiceSession.encoderInfo, voiceSession.audioStreamFrames)
.takeWhile { it !is DictationServiceResponse.Complete }
.onEach {
Logging.v("DictationServiceResponse: $it")
withTimeout(1.minutes) {
when (it) {
is DictationServiceResponse.Ready -> {
pebbleDevice.activeVoiceSession.value = voiceSession
val result = SessionSetupResult(
sessionType = SessionType.Dictation,
result = Result.Success
)
if (appInitiated) {
result.flags.set(1u)
}
pebbleDevice.voiceService.send(result)
sentReady = true
}
is DictationServiceResponse.Error -> {
val result = if (sentReady) {
DictationResult(
voiceSession.sessionId.toUShort(),
it.result,
buildList {
if (appInitiated && voiceSession.appUuid != null) {
add(VoiceAttribute.AppUuid().apply {
uuid.set(voiceSession.appUuid)
})
}
}
)
} else {
SessionSetupResult(
sessionType = SessionType.Dictation,
result = it.result
)
}
if (appInitiated) {
result.flags.set(1u)
}
pebbleDevice.voiceService.send(result)
}
is DictationServiceResponse.Transcription -> {
val resp = DictationResult(
voiceSession.sessionId.toUShort(),
Result.Success,
buildList {
add(makeTranscription(it.sentences))
if (appInitiated && voiceSession.appUuid != null) {
add(VoiceAttribute(
id = VoiceAttributeType.AppUuid.value,
content = VoiceAttribute.AppUuid().apply {
uuid.set(voiceSession.appUuid)
}
))
}
}
)
if (appInitiated) {
resp.flags.set(1u)
}
pebbleDevice.voiceService.send(resp)
}
}
}
}
.catch {
Logging.e("Error in voice session: $it")
val result = if (sentReady) {
DictationResult(
voiceSession.sessionId.toUShort(),
Result.FailRecognizerError,
buildList {
if (appInitiated && voiceSession.appUuid != null) {
add(VoiceAttribute.AppUuid().apply {
uuid.set(voiceSession.appUuid)
})
}
}
)
} else {
SessionSetupResult(
sessionType = SessionType.Dictation,
result = Result.FailRecognizerError
)
}
if (appInitiated) {
result.flags.set(1u)
}
pebbleDevice.voiceService.send(result)
}
.collect()
}

private suspend fun listenForVoiceSessions() {
for (message in pebbleDevice.voiceService.receivedMessages) {
when (message) {
Expand All @@ -70,74 +170,9 @@ class VoiceSessionHandler(
val dictationService: DictationService by inject()
val voiceSession = VoiceSession(appUuid, message.sessionId.get().toInt(), encoderInfo, dictationService)
Logging.d("Received voice session: $voiceSession")

var sentReady = false
dictationService.handleSpeechStream(voiceSession.encoderInfo, voiceSession.audioStreamFrames)
.takeWhile { it !is DictationServiceResponse.Complete }
.onEach {
Logging.v("DictationServiceResponse: $it")
}
.collect {
when (it) {
is DictationServiceResponse.Ready -> {
pebbleDevice.activeVoiceSession.value = voiceSession
val result = SessionSetupResult(
sessionType = SessionType.Dictation,
result = Result.Success
)
if (appInitiated) {
result.flags.set(1u)
}
pebbleDevice.voiceService.send(result)
sentReady = true
}
is DictationServiceResponse.Error -> {
val result = if (sentReady) {
DictationResult(
voiceSession.sessionId.toUShort(),
it.result,
buildList {
if (appInitiated && voiceSession.appUuid != null) {
add(VoiceAttribute.AppUuid().apply {
uuid.set(voiceSession.appUuid)
})
}
}
)
} else {
SessionSetupResult(
sessionType = SessionType.Dictation,
result = it.result
)
}
if (appInitiated) {
result.flags.set(1u)
}
pebbleDevice.voiceService.send(result)
}
is DictationServiceResponse.Transcription -> {
val resp = DictationResult(
voiceSession.sessionId.toUShort(),
Result.Success,
buildList {
add(makeTranscription(it.sentences))
if (appInitiated && voiceSession.appUuid != null) {
add(VoiceAttribute(
id = VoiceAttributeType.AppUuid.value,
content = VoiceAttribute.AppUuid().apply {
uuid.set(voiceSession.appUuid)
}
))
}
}
)
if (appInitiated) {
resp.flags.set(1u)
}
pebbleDevice.voiceService.send(resp)
}
}
}
coroutineScope {
launch { handleSpeechStream(voiceSession) }
}
}
}

Expand Down

0 comments on commit 2490cef

Please sign in to comment.