Skip to content

Commit

Permalink
add more examples
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Aug 12, 2024
1 parent 24ebcea commit ae0b2e0
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 0 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/pascal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,17 @@ jobs:
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*
echo "---"
./run-paraformer.sh
rm -rf sherpa-onnx-*
echo "---"
./run-zipformer-ctc.sh
echo "---"
./run-zipformer-ctc-hlg.sh
rm -rf sherpa-onnx-*
echo "---"
ls -lh
popd
2 changes: 2 additions & 0 deletions pascal-api-examples/streaming-asr/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
zipformer_transducer
paraformer
zipformer_ctc
zipformer_ctc_hlg
11 changes: 11 additions & 0 deletions pascal-api-examples/streaming-asr/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Introduction

This folder contains examples about using sherpa-onnx's object pascal
APIs with streaming models for speech recognition.

|File|Description|
|----|-----------|
|./run-paraformer.sh|Use a streaming Paraformer model for speech recognition|
|./run-zipformer-ctc-hlg.sh|Use a streaming Zipformer CTC model for speech recognition|
|./run-zipformer-ctc.sh|Use a streaming Zipformer CTC model with HLG for speech recognition|
|./run-zipformer-transducer.sh|Use a Zipformer transducer model for speech recognition|
89 changes: 89 additions & 0 deletions pascal-api-examples/streaming-asr/zipformer_ctc.pas
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{ Copyright (c) 2024 Xiaomi Corporation }

{
This file shows how to use a streaming Zipformer CTC model
to decode files.
You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program zipformer_ctc;

{$mode delphi}

uses
sherpa_onnx,
DateUtils,
SysUtils;


var
Config: TSherpaOnnxOnlineRecognizerConfig;
Recognizer: TSherpaOnnxOnlineRecognizer;
Stream: TSherpaOnnxOnlineStream;
RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
Wave: TSherpaOnnxWave;
WaveFilename: AnsiString;
TailPaddings: array of Single;

Start: TDateTime;
Stop: TDateTime;

Elapsed: Single;
Duration: Single;
RealTimeFactor: Single;
begin
Initialize(Config);

{Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
to download model files used in this file.}
Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx';
Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt';
Config.ModelConfig.Provider := 'cpu';
Config.ModelConfig.NumThreads := 1;
Config.ModelConfig.Debug := False;

WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';

Wave := SherpaOnnxReadWave(WaveFilename);

Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);

Start := Now;

Stream := Recognizer.CreateStream();


Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);

SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);

Stream.InputFinished();

while Recognizer.IsReady(Stream) do
Recognizer.Decode(Stream);

RecognitionResult := Recognizer.GetResult(Stream);

Stop := Now;

Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
Duration := Length(Wave.Samples) / Wave.SampleRate;
RealTimeFactor := Elapsed / Duration;

WriteLn(RecognitionResult.ToString);
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
WriteLn(Format('Wave duration %.3f s', [Duration]));
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

{Free resources to avoid memory leak.
Note: You don't need to invoke them for this simple script.
However, you have to invoke them in your own large/complex project.
}
FreeAndNil(Stream);
FreeAndNil(Recognizer);
end.
90 changes: 90 additions & 0 deletions pascal-api-examples/streaming-asr/zipformer_ctc_hlg.pas
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
{ Copyright (c) 2024 Xiaomi Corporation }

{
This file shows how to use a streaming Zipformer CTC model
with HLG to decode files.
You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program zipformer_ctc_hlg;

{$mode delphi}

uses
sherpa_onnx,
DateUtils,
SysUtils;


var
Config: TSherpaOnnxOnlineRecognizerConfig;
Recognizer: TSherpaOnnxOnlineRecognizer;
Stream: TSherpaOnnxOnlineStream;
RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
Wave: TSherpaOnnxWave;
WaveFilename: AnsiString;
TailPaddings: array of Single;

Start: TDateTime;
Stop: TDateTime;

Elapsed: Single;
Duration: Single;
RealTimeFactor: Single;
begin
Initialize(Config);

{Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
to download model files used in this file.}
Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx';
Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt';
Config.ModelConfig.Provider := 'cpu';
Config.ModelConfig.NumThreads := 1;
Config.ModelConfig.Debug := True;
Config.CtcFstDecoderConfig.Graph := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst';

WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';

Wave := SherpaOnnxReadWave(WaveFilename);

Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);

Start := Now;

Stream := Recognizer.CreateStream();


Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);

SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);

Stream.InputFinished();

while Recognizer.IsReady(Stream) do
Recognizer.Decode(Stream);

RecognitionResult := Recognizer.GetResult(Stream);

Stop := Now;

Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
Duration := Length(Wave.Samples) / Wave.SampleRate;
RealTimeFactor := Elapsed / Duration;

WriteLn(RecognitionResult.ToString);
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
WriteLn(Format('Wave duration %.3f s', [Duration]));
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

{Free resources to avoid memory leak.
Note: You don't need to invoke them for this simple script.
However, you have to invoke them in your own large/complex project.
}
FreeAndNil(Stream);
FreeAndNil(Recognizer);
end.

0 comments on commit ae0b2e0

Please sign in to comment.