Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Offline Speech Recognition #2089 #2242

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
2d8bb93
Offline Speech Recognition #2089
VladislavAntonyuk Sep 30, 2024
67f44a3
Merge branch 'main' into 2089-offline-speech-recognition
VladislavAntonyuk Oct 1, 2024
9b7e48d
Offline Speech Recognition #2089 (#2258)
VladislavAntonyuk Oct 5, 2024
07c4ac8
Fix build
VladislavAntonyuk Oct 7, 2024
f3c1497
Merge branch 'main' into 2089-offline-speech-recognition
VladislavAntonyuk Oct 10, 2024
6c52500
Update according to comments
VladislavAntonyuk Oct 14, 2024
27bf6ae
Merge branch 'main' into 2089-offline-speech-recognition
VladislavAntonyuk Oct 14, 2024
e8a28b8
Fix tizen
VladislavAntonyuk Oct 14, 2024
02d322a
Merge branch 'main' into 2089-offline-speech-recognition
VladislavAntonyuk Oct 19, 2024
14facc0
Discard changes to samples/CommunityToolkit.Maui.Sample/CommunityTool…
VladislavAntonyuk Oct 19, 2024
4e8b436
Discard changes to global.json
VladislavAntonyuk Oct 19, 2024
285e477
Merge branch 'main' into 2089-offline-speech-recognition
VladislavAntonyuk Oct 21, 2024
eddfc71
Remove Task
VladislavAntonyuk Oct 25, 2024
79345ae
Merge remote-tracking branch 'origin/main' into 2089-offline-speech-r…
VladislavAntonyuk Oct 25, 2024
3f8b96f
Merge branch '2089-offline-speech-recognition' of https://github.com/…
VladislavAntonyuk Oct 25, 2024
67894fc
Fix tizen
VladislavAntonyuk Oct 25, 2024
b69b054
Update ISpeechToText.shared.cs
VladislavAntonyuk Oct 27, 2024
e995e8a
Update ISpeechToText.shared.cs
VladislavAntonyuk Oct 27, 2024
833c9c7
Update samples/CommunityToolkit.Maui.Sample/ViewModels/Essentials/Off…
VladislavAntonyuk Oct 27, 2024
fd3e1fb
Fix xml comment
VladislavAntonyuk Oct 27, 2024
7ddd7fe
Update sample
VladislavAntonyuk Nov 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions samples/CommunityToolkit.Maui.Sample/AppShell.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ public partial class AppShell : Shell
CreateViewModelMapping<FileSaverPage, FileSaverViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),
CreateViewModelMapping<FolderPickerPage, FolderPickerViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),
CreateViewModelMapping<SpeechToTextPage, SpeechToTextViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),
CreateViewModelMapping<OfflineSpeechToTextPage, OfflineSpeechToTextViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),

// Add Extensions View Models
CreateViewModelMapping<ColorAnimationExtensionsPage, ColorAnimationExtensionsViewModel, ExtensionsGalleryPage, ExtensionsGalleryViewModel>(),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>$(NetVersion)-ios;$(NetVersion)-android;$(NetVersion)-maccatalyst</TargetFrameworks>
Expand Down
4 changes: 3 additions & 1 deletion samples/CommunityToolkit.Maui.Sample/MauiProgram.cs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ static void RegisterViewsAndViewModels(in IServiceCollection services)
services.AddTransientWithShellRoute<FileSaverPage, FileSaverViewModel>();
services.AddTransientWithShellRoute<FolderPickerPage, FolderPickerViewModel>();
services.AddTransientWithShellRoute<SpeechToTextPage, SpeechToTextViewModel>();
services.AddTransientWithShellRoute<OfflineSpeechToTextPage, OfflineSpeechToTextViewModel>();

// Add Extensions Pages + ViewModels
services.AddTransientWithShellRoute<ColorAnimationExtensionsPage, ColorAnimationExtensionsViewModel>();
Expand Down Expand Up @@ -262,7 +263,8 @@ static void RegisterEssentials(in IServiceCollection services)
services.AddSingleton<IFileSystem>(FileSystem.Current);
services.AddSingleton<IFolderPicker>(FolderPicker.Default);
services.AddSingleton<IBadge>(Badge.Default);
services.AddSingleton<ISpeechToText>(SpeechToText.Default);
services.AddKeyedSingleton<ISpeechToText, SpeechToTextImplementation>("Online");
services.AddKeyedSingleton<ISpeechToText, OfflineSpeechToTextImplementation>("Offline");
services.AddSingleton<ITextToSpeech>(TextToSpeech.Default);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
<?xml version="1.0" encoding="utf-8" ?>
<pages:BasePage xmlns="http://schemas.microsoft.com/dotnet/2021/maui"
xmlns:x="http://schemas.microsoft.com/winfx/2009/xaml"
xmlns:pages="clr-namespace:CommunityToolkit.Maui.Sample.Pages"
x:Class="CommunityToolkit.Maui.Sample.Pages.Essentials.OfflineSpeechToTextPage"
xmlns:vm="clr-namespace:CommunityToolkit.Maui.Sample.ViewModels.Essentials"
xmlns:essentials="clr-namespace:CommunityToolkit.Maui.Sample.Pages.Essentials"
x:TypeArguments="vm:OfflineSpeechToTextViewModel"
x:DataType="vm:OfflineSpeechToTextViewModel"
Title="OfflineSpeechToText">

<ContentPage.Resources>
<essentials:PickerLocaleDisplayConverter x:Key="PickerLocaleDisplayConverter" />
</ContentPage.Resources>

<ScrollView>
<VerticalStackLayout
Spacing="20"
Padding="30,0">

<Label
Text="OfflineSpeechToText allows the user to convert speech to text in real time in offline"
HorizontalTextAlignment="Center"/>

<Label
Text="Locale"
FontAttributes="Bold"/>

<Picker
ItemsSource="{Binding Locales}"
SelectedItem="{Binding CurrentLocale}"
ItemDisplayBinding="{Binding ., Converter={StaticResource PickerLocaleDisplayConverter}}"/>

<Label
Text="State"
FontAttributes="Bold"/>

<Label
Text="{Binding State}"
FontSize="18"
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
MinimumHeightRequest="100" />

<Label
Text="Language Output"
FontAttributes="Bold"/>

<Label
Text="{Binding RecognitionText}"
FontSize="18"
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
MinimumHeightRequest="100" />

<Button
Text="Play"
Command="{Binding PlayCommand}"
HorizontalOptions="Center" />

<Border
StrokeThickness="2"
Stroke="#808080"
StrokeShape="RoundRectangle 8,8,8,8"
Padding="12">
<Border.Content>
<Grid RowDefinitions="*,60"
ColumnDefinitions="*,*"
RowSpacing="12"
ColumnSpacing="12">

<Button
Grid.Row="0"
Grid.Column="0"
Text="StartListenAsync"
Command="{Binding StartListenCommand}"
HorizontalOptions="End" />

<Button
Grid.Row="0"
Grid.Column="1"
Text="StopListenAsync"
Command="{Binding StopListenCommand}"
HorizontalOptions="Start" />

<Label
Grid.Row="1"
Grid.ColumnSpan="2"
Text="The `StartListenAsync` API starts the speech-to-text service and shares the results using `RecognitionResultUpdated` event and `RecognitionResultCompleted` event."
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
FontSize="12"/>

</Grid>
</Border.Content>
</Border>
</VerticalStackLayout>
</ScrollView>

</pages:BasePage>
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using System.Globalization;
using CommunityToolkit.Maui.Converters;
using CommunityToolkit.Maui.Sample.ViewModels.Essentials;

namespace CommunityToolkit.Maui.Sample.Pages.Essentials;

public partial class OfflineSpeechToTextPage : BasePage<OfflineSpeechToTextViewModel>
{
public OfflineSpeechToTextPage(OfflineSpeechToTextViewModel viewModel) : base(viewModel)
{
InitializeComponent();
}

protected override async void OnAppearing()
{
base.OnAppearing();

await BindingContext.SetLocalesCommand.ExecuteAsync(null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@
SelectedItem="{Binding CurrentLocale}"
ItemDisplayBinding="{Binding ., Converter={StaticResource PickerLocaleDisplayConverter}}"/>

<Label
Text="State"
FontAttributes="Bold"/>

<Label
Text="{Binding State}"
FontSize="18"
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
MinimumHeightRequest="100" />

<Label
Text="Language Output"
FontAttributes="Bold"/>
Expand All @@ -47,44 +58,6 @@
Command="{Binding PlayCommand}"
HorizontalOptions="Center" />

<Border
StrokeThickness="2"
Stroke="#808080"
StrokeShape="RoundRectangle 8,8,8,8"
Padding="12">
<Border.Content>

<Grid RowDefinitions="*,60"
ColumnDefinitions="*,*"
RowSpacing="12"
ColumnSpacing="12">

<Button
Grid.Row="0"
Grid.Column="0"
Text="ListenAsync"
Command="{Binding ListenCommand}"
HorizontalOptions="End" />

<Button
Grid.Row="0"
Grid.Column="1"
Text="Cancel Token"
Command="{Binding ListenCancelCommand}"
HorizontalOptions="Start" />

<Label
Grid.Row="1"
Grid.ColumnSpan="2"
Text="The `ListenAsync` API allows you to await the final speech recognition results using async/await. `ListenAsync` is cancelled via CancellationToken."
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
FontSize="12"/>

</Grid>
</Border.Content>
</Border>

<Border
StrokeThickness="2"
Stroke="#808080"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ public class EssentialsGalleryViewModel() : BaseGalleryViewModel(
SectionModel.Create<BadgeViewModel>("Badge", "Allows the user to set app icon badge count on the home screen"),
SectionModel.Create<FileSaverViewModel>("FileSaver", "Allows the user to save files to the filesystem"),
SectionModel.Create<FolderPickerViewModel>("FolderPicker", "Allows picking folders from the file system"),
SectionModel.Create<SpeechToTextViewModel>("SpeechToText", "Converts speech to text")
SectionModel.Create<SpeechToTextViewModel>("SpeechToText", "Converts speech to text"),
SectionModel.Create<OfflineSpeechToTextViewModel>("OfflineSpeechToText", "Converts speech to text offline")
]);
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
using System.Collections.ObjectModel;
using System.Collections.Specialized;
using System.Globalization;
using CommunityToolkit.Maui.Alerts;
using CommunityToolkit.Maui.Media;
using CommunityToolkit.Mvvm.ComponentModel;
using CommunityToolkit.Mvvm.Input;

namespace CommunityToolkit.Maui.Sample.ViewModels.Essentials;

public partial class OfflineSpeechToTextViewModel : BaseViewModel
{
const string defaultLanguage = "en-US";
const string defaultLanguageAndroid = "en";
const string defaultLanguageTizen = "en_US";

readonly ITextToSpeech textToSpeech;
readonly ISpeechToText speechToText;

[ObservableProperty]
Locale? currentLocale;

public SpeechToTextState? State => speechToText.CurrentState;

[ObservableProperty]
string? recognitionText = "Welcome to .NET MAUI Community Toolkit!";

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StartListenCommand))]
bool canStartListenExecute = true;

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StopListenCommand))]
bool canStopListenExecute;

public OfflineSpeechToTextViewModel(ITextToSpeech textToSpeech)
{
this.textToSpeech = textToSpeech;
this.speechToText = OfflineSpeechToText.Default;
VladislavAntonyuk marked this conversation as resolved.
Show resolved Hide resolved

Locales.CollectionChanged += HandleLocalesCollectionChanged;
this.speechToText.StateChanged += HandleSpeechToTextStateChanged;
this.speechToText.RecognitionResultCompleted += HandleRecognitionResultCompleted;
}

public ObservableCollection<Locale> Locales { get; } = [];

[RelayCommand]
async Task SetLocales(CancellationToken token)
{
Locales.Clear();

var locales = await textToSpeech.GetLocalesAsync().WaitAsync(token);

foreach (var locale in locales.OrderBy(x => x.Language).ThenBy(x => x.Name))
{
Locales.Add(locale);
}

CurrentLocale = Locales.FirstOrDefault(x => x.Language is defaultLanguage or defaultLanguageAndroid or defaultLanguageTizen) ?? Locales.FirstOrDefault();
}

[RelayCommand]
async Task Play(CancellationToken cancellationToken)
{
var timeoutCancellationTokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(5));

try
{
await textToSpeech.SpeakAsync(RecognitionText ?? "Welcome to .NET MAUI Community Toolkit!", new()
{
Locale = CurrentLocale,
Pitch = 1,
Volume = 1
}, cancellationToken).WaitAsync(timeoutCancellationTokenSource.Token);
}
catch (TaskCanceledException)
{
await Toast.Make("Playback automatically stopped after 5 seconds").Show(cancellationToken);
#if IOS
await Toast.Make("If you did not hear playback, test again on a physical iOS device").Show(cancellationToken);
#endif
}
}

[RelayCommand(CanExecute = nameof(CanStartListenExecute))]
async Task StartListen()
{
CanStartListenExecute = false;

var isGranted = await speechToText.RequestPermissions(CancellationToken.None);
if (!isGranted)
{
await Toast.Make("Permission not granted").Show(CancellationToken.None);
return;
}

const string beginSpeakingPrompt = "Begin speaking...";

RecognitionText = beginSpeakingPrompt;

speechToText.RecognitionResultUpdated += HandleRecognitionResultUpdated;

await speechToText.StartListenAsync(new SpeechToTextOptions()
{
Culture = CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage),
ShouldReportPartialResults = true
}, CancellationToken.None);

if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
}

[RelayCommand(CanExecute = nameof(CanStopListenExecute))]
Task StopListen()
{
CanStartListenExecute = true;
CanStopListenExecute = false;

speechToText.RecognitionResultUpdated -= HandleRecognitionResultUpdated;

return speechToText.StopListenAsync(CancellationToken.None);
}

void HandleRecognitionResultUpdated(object? sender, SpeechToTextRecognitionResultUpdatedEventArgs e)
{
RecognitionText += e.RecognitionResult;
}

void HandleRecognitionResultCompleted(object? sender, SpeechToTextRecognitionResultCompletedEventArgs e)
{
RecognitionText = e.RecognitionResult.IsSuccessful ? e.RecognitionResult.Text : e.RecognitionResult.Exception.Message;
}

void HandleSpeechToTextStateChanged(object? sender, SpeechToTextStateChangedEventArgs e)
{
OnPropertyChanged(nameof(State));
}

void HandleLocalesCollectionChanged(object? sender, NotifyCollectionChangedEventArgs e)
{
OnPropertyChanged(nameof(CurrentLocale));
}
}
Loading