Skip to content

Commit

Permalink
Add C# API for spoken language identification (#697)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Mar 25, 2024
1 parent 83a10a5 commit 305c373
Show file tree
Hide file tree
Showing 10 changed files with 265 additions and 55 deletions.
30 changes: 30 additions & 0 deletions .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash

cd dotnet-examples/

cd spoken-language-identification
./run.sh

cd ../online-decode-files
./run-zipformer2-ctc.sh
./run-transducer.sh
./run-paraformer.sh

cd ../offline-decode-files
./run-nemo-ctc.sh
./run-paraformer.sh
./run-zipformer.sh
./run-hotwords.sh
./run-whisper.sh
./run-tdnn-yesno.sh

cd ../offline-tts
./run-aishell3.sh
./run-piper.sh
ls -lh

cd ../..

mkdir tts

cp dotnet-examples/offline-tts/*.wav ./tts
27 changes: 2 additions & 25 deletions .github/workflows/test-dot-net-nuget.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,33 +40,10 @@ jobs:
- name: Check dotnet
run: dotnet --info

- name: Decode a file
- name: Run tests
shell: bash
run: |
cd dotnet-examples/
cd online-decode-files
./run-transducer.sh
./run-paraformer.sh
cd ../offline-decode-files
./run-nemo-ctc.sh
./run-paraformer.sh
./run-zipformer.sh
./run-hotwords.sh
./run-whisper.sh
./run-tdnn-yesno.sh
cd ../offline-tts
./run-aishell3.sh
./run-piper.sh
ls -lh
cd ../..
mkdir tts
cp dotnet-examples/offline-tts/*.wav ./tts
.github/scripts/test-dot-net.sh
- uses: actions/upload-artifact@v4
with:
Expand Down
31 changes: 4 additions & 27 deletions .github/workflows/test-dot-net.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -177,39 +177,16 @@ jobs:
cp -v scripts/dotnet/examples/offline-decode-files.csproj dotnet-examples/offline-decode-files/
cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
ls -lh /tmp
- name: Decode a file
- name: Run tests
shell: bash
run: |
cd dotnet-examples/
.github/scripts/test-dot-net.sh
cd online-decode-files
./run-zipformer2-ctc.sh
./run-transducer.sh
./run-paraformer.sh
cd ../offline-decode-files
./run-nemo-ctc.sh
./run-paraformer.sh
./run-zipformer.sh
./run-hotwords.sh
./run-whisper.sh
./run-tdnn-yesno.sh
cd ../offline-tts
./run-aishell3.sh
./run-piper.sh
ls -lh
cd ../..
mkdir tts
cp dotnet-examples/offline-tts/*.wav ./tts
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: dot-net-tts-generated-test-files-${{ matrix.os }}
path: tts
6 changes: 6 additions & 0 deletions dotnet-examples/sherpa-onnx.sln
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts", "offline-tts\
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline-tts-play\offline-tts-play.csproj", "{40781464-5948-462B-BA4B-98932711513F}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -42,5 +44,9 @@ Global
{40781464-5948-462B-BA4B-98932711513F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.Build.0 = Release|Any CPU
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
42 changes: 42 additions & 0 deletions dotnet-examples/spoken-language-identification/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to do spoken language identification with whisper.
//
// 1. Download a whisper multilingual model. We use a tiny model below.
// Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// to download more models.
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
// rm sherpa-onnx-whisper-tiny.tar.bz2
//
// 2. Now run it
//
// dotnet run

using SherpaOnnx;
using System.Collections.Generic;
using System;

class SpokenLanguageIdentificationDemo
{

static void Main(string[] args)
{
var config = new SpokenLanguageIdentificationConfig();

Check failure on line 26 in dotnet-examples/spoken-language-identification/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net-nuget (ubuntu-latest)

The type or namespace name 'SpokenLanguageIdentificationConfig' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 26 in dotnet-examples/spoken-language-identification/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net-nuget (macos-latest)

The type or namespace name 'SpokenLanguageIdentificationConfig' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 26 in dotnet-examples/spoken-language-identification/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net-nuget (windows-latest)

The type or namespace name 'SpokenLanguageIdentificationConfig' could not be found (are you missing a using directive or an assembly reference?)
config.Whisper.Encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
config.Whisper.Decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";

var slid = new SpokenLanguageIdentification(config);

Check failure on line 30 in dotnet-examples/spoken-language-identification/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net-nuget (ubuntu-latest)

The type or namespace name 'SpokenLanguageIdentification' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 30 in dotnet-examples/spoken-language-identification/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net-nuget (macos-latest)

The type or namespace name 'SpokenLanguageIdentification' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 30 in dotnet-examples/spoken-language-identification/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net-nuget (windows-latest)

The type or namespace name 'SpokenLanguageIdentification' could not be found (are you missing a using directive or an assembly reference?)
var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";

WaveReader waveReader = new WaveReader(filename);

var s = slid.CreateStream();
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
var result = slid.Compute(s);
Console.WriteLine($"Filename: {filename}");
Console.WriteLine($"Detected language: {result.Lang}");
}
}

12 changes: 12 additions & 0 deletions dotnet-examples/spoken-language-identification/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-whisper-tiny ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2
fi

dotnet run

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>spoken_language_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>

</Project>
19 changes: 19 additions & 0 deletions scripts/dotnet/examples/spoken-language-identification.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>spoken_language_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<PropertyGroup>
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>

</Project>
137 changes: 134 additions & 3 deletions scripts/dotnet/offline.cs
Original file line number Diff line number Diff line change
Expand Up @@ -403,8 +403,8 @@ public OfflineRecognizerResult(IntPtr handle)
while (*buffer != 0)
{
++buffer;
length += 1;
}
length = (int)(buffer - (byte*)impl.Text);
}

byte[] stringBuffer = new byte[length];
Expand Down Expand Up @@ -496,8 +496,6 @@ public OfflineStream CreateStream()
return new OfflineStream(p);
}

/// You have to ensure that IsReady(stream) returns true before
/// you call this method
public void Decode(OfflineStream stream)
{
Decode(_handle.Handle, stream.Handle);
Expand Down Expand Up @@ -549,4 +547,137 @@ private void Cleanup()
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
}

[StructLayout(LayoutKind.Sequential)]
public struct SpokenLanguageIdentificationWhisperConfig
{
public SpokenLanguageIdentificationWhisperConfig()
{
Encoder = "";
Decoder = "";
TailPaddings = -1;
}

[MarshalAs(UnmanagedType.LPStr)]
public string Encoder;

[MarshalAs(UnmanagedType.LPStr)]
public string Decoder;

public int TailPaddings;
}

public struct SpokenLanguageIdentificationConfig
{
public SpokenLanguageIdentificationConfig()
{
Whisper = new SpokenLanguageIdentificationWhisperConfig();
NumThreads = 1;
Debug = 0;
Provider = "cpu";
}
public SpokenLanguageIdentificationWhisperConfig Whisper;

public int NumThreads;
public int Debug;

[MarshalAs(UnmanagedType.LPStr)]
public string Provider;
}

public class SpokenLanguageIdentificationResult
{
public SpokenLanguageIdentificationResult(IntPtr handle)
{
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));

// PtrToStringUTF8() requires .net standard 2.1
// _text = Marshal.PtrToStringUTF8(impl.Text);

int length = 0;

unsafe
{
byte* buffer = (byte*)impl.Lang;
while (*buffer != 0)
{
++buffer;
length += 1;
}
}

byte[] stringBuffer = new byte[length];
Marshal.Copy(impl.Lang, stringBuffer, 0, length);
_lang = Encoding.UTF8.GetString(stringBuffer);
}

[StructLayout(LayoutKind.Sequential)]
struct Impl
{
public IntPtr Lang;
}

private String _lang;
public String Lang => _lang;
}

public class SpokenLanguageIdentification : IDisposable
{
public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config)
{
IntPtr h = SherpaOnnxCreateSpokenLanguageIdentification(ref config);
_handle = new HandleRef(this, h);
}

public OfflineStream CreateStream()
{
IntPtr p = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(_handle.Handle);
return new OfflineStream(p);
}

public SpokenLanguageIdentificationResult Compute(OfflineStream stream)
{
IntPtr h = SherpaOnnxSpokenLanguageIdentificationCompute(_handle.Handle, stream.Handle);
SpokenLanguageIdentificationResult result = new SpokenLanguageIdentificationResult(h);
SherpaOnnxDestroySpokenLanguageIdentificationResult(h);
return result;
}

public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}

~SpokenLanguageIdentification()
{
Cleanup();
}

private void Cleanup()
{
SherpaOnnxDestroySpokenLanguageIdentification(_handle.Handle);

// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}

private HandleRef _handle;

[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxCreateSpokenLanguageIdentification(ref SpokenLanguageIdentificationConfig config);

[DllImport(Dll.Filename)]
private static extern void SherpaOnnxDestroySpokenLanguageIdentification(IntPtr handle);

[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(IntPtr handle);

[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCompute(IntPtr handle, IntPtr stream);

[DllImport(Dll.Filename)]
private static extern void SherpaOnnxDestroySpokenLanguageIdentificationResult(IntPtr handle);
}
}

0 comments on commit 305c373

Please sign in to comment.