Skip to content

Commit

Permalink
Add Pascal API for VAD
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Aug 13, 2024
1 parent f8d2bf7 commit df322a1
Show file tree
Hide file tree
Showing 8 changed files with 360 additions and 9 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/pascal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,32 @@ jobs:
cp -v install/lib/*.dll ../pascal-api-examples/read-wav
cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr
cp -v install/lib/*.dll ../pascal-api-examples/non-streaming-asr
cp -v install/lib/*.dll ../pascal-api-examples/vad
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/non-streaming-asr
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/vad
fi
- name: Run Pascal test (VAD test)
shell: bash
run: |
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
cd ./pascal-api-examples
pushd vad
./run-circular-buffer.sh
echo "---"
./run-remove-silence.sh
echo "---"
ls -lh
popd
- name: Run Pascal test (Read wav test)
shell: bash
run: |
Expand Down
2 changes: 2 additions & 0 deletions pascal-api-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
|[read-wav](./read-wav)|It shows how to read a wave file.|
|[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.|
|[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.|
|[vad](./vad)| It shows how to use the voice activity detection API.|
|[vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| It shows how to use the voice activity detection API with non-streaming models for speech recognition.|
1 change: 1 addition & 0 deletions pascal-api-examples/vad/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
!run*.sh
circular_buffer
remove_silence
11 changes: 11 additions & 0 deletions pascal-api-examples/vad/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Introduction


This directory contains examples for how to use the VAD (voice activity detection)
APIs.

|Directory| Description|
|---------|------------|
|[run-circular-buffer.sh](./run-circular-buffer.sh)|It shows how to use the circular buffer API.|
|[run-remove-silence.sh](./run-remove-silence.sh)|It shows how to use the VAD API to remove silences from a wave file.|

3 changes: 3 additions & 0 deletions pascal-api-examples/vad/circular_buffer.pas
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
{ Copyright (c) 2024 Xiaomi Corporation }
program circular_buffer;
{
This file shows how to use the CircularBuffer API of sherpa-onnx
}

{$mode objfpc}
{$ASSERTIONS ON}
Expand Down
100 changes: 100 additions & 0 deletions pascal-api-examples/vad/remove_silence.pas
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{ Copyright (c) 2024 Xiaomi Corporation }
{
This file shows how to use the VAD API from sherpa-onnx
to remove silences from a wave file.
}
program main;

{$mode delphi}

uses
sherpa_onnx,
SysUtils;

var
Wave: TSherpaOnnxWave;

Config: TSherpaOnnxVadModelConfig;
Vad: TSherpaOnnxVoiceActivityDetector;
Offset: Integer;
WindowSize: Integer;
SpeechSegment: TSherpaOnnxSpeechSegment;

Start: Single;
Duration: Single;
SampleRate: Integer;

AllSpeechSegment: array of TSherpaOnnxSpeechSegment;
AllSamples: array of Single;
N: Integer;
I: Integer;
begin

SampleRate := 16000; {Please don't change it unless you know the details}

Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
if Wave.SampleRate <> SampleRate then
begin
WriteLn(Format('Expected sample rate: %d. Given: %d',
[SampleRate, Wave.SampleRate]));

Exit;
end;

WindowSize := 512; {Please don't change it unless you know the details}
Initialize(Config);

Config.SileroVad.Model := './silero_vad.onnx';
Config.SileroVad.MinSpeechDuration := 0.25;
Config.SileroVad.MinSilenceDuration := 0.5;
Config.SileroVad.Threshold := 0.5;
Config.SileroVad.WindowSize := WindowSize;
Config.NumThreads:= 1;
Config.Debug:= True;
Config.Provider:= 'cpu';
Config.SampleRate := SampleRate;

Vad := TSherpaOnnxVoiceActivityDetector.Create(Config, 20);

AllSpeechSegment := nil;
AllSamples := nil;
Offset := 0;
while Offset + WindowSize <= Length(Wave.Samples) do
begin
Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
Inc(Offset, WindowSize);

while not Vad.IsEmpty do
begin
SetLength(AllSpeechSegment, Length(AllSpeechSegment) + 1);

SpeechSegment := Vad.Front();
Vad.Pop();
AllSpeechSegment[Length(AllSpeechSegment)-1] := SpeechSegment;

Start := SpeechSegment.Start / SampleRate;
Duration := Length(SpeechSegment.Samples) / SampleRate;
WriteLn(Format('%.3f -- %.3f', [Start, Start + Duration]));
end;
end;

N := 0;
for SpeechSegment in AllSpeechSegment do
Inc(N, Length(SpeechSegment.Samples));

SetLength(AllSamples, N);

N := 0;
for SpeechSegment in AllSpeechSegment do
begin
for I := Low(SpeechSegment.Samples) to High(SpeechSegment.Samples) do
begin
AllSamples[N] := SpeechSegment.Samples[I];
Inc(N);
end;
end;

SherpaOnnxWriteWave('./lei-jun-test-no-silence.wav', AllSamples, SampleRate);
WriteLn('Saved to ./lei-jun-test-no-silence.wav');

end.
42 changes: 42 additions & 0 deletions pascal-api-examples/vad/run-remove-silence.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
mkdir -p ../../build
pushd ../../build
cmake \
-DCMAKE_INSTALL_PREFIX=./install \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
..

cmake --build . --target install --config Release
popd
fi

if [[ ! -f ./silero_vad.onnx ]]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

fpc \
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
-Fl$SHERPA_ONNX_DIR/build/install/lib \
./remove_silence.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./remove_silence
Loading

0 comments on commit df322a1

Please sign in to comment.