Skip to content

Commit

Permalink
Add a C++ example to show streaming VAD + non-streaming ASR. (#420)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Nov 11, 2023
1 parent 3c1ea99 commit 68f0e59
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 1 deletion.
1 change: 1 addition & 0 deletions cmake/cmake_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def build_extension(self, ext: setuptools.extension.Extension):
binaries += ["sherpa-onnx-offline-websocket-server"]
binaries += ["sherpa-onnx-online-websocket-client"]
binaries += ["sherpa-onnx-vad-microphone"]
binaries += ["sherpa-onnx-vad-microphone-offline-asr"]
binaries += ["sherpa-onnx-offline-tts"]

if is_windows():
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def get_binaries_to_install():
binaries += ["sherpa-onnx-offline-websocket-server"]
binaries += ["sherpa-onnx-online-websocket-client"]
binaries += ["sherpa-onnx-vad-microphone"]
binaries += ["sherpa-onnx-vad-microphone-offline-asr"]
binaries += ["sherpa-onnx-offline-tts"]
if is_windows():
binaries += ["kaldi-native-fbank-core.dll"]
Expand Down
6 changes: 6 additions & 0 deletions sherpa-onnx/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
microphone.cc
)

add_executable(sherpa-onnx-vad-microphone-offline-asr
sherpa-onnx-vad-microphone-offline-asr.cc
microphone.cc
)

if(BUILD_SHARED_LIBS)
set(PA_LIB portaudio)
else()
Expand All @@ -235,6 +240,7 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
sherpa-onnx-microphone
sherpa-onnx-microphone-offline
sherpa-onnx-vad-microphone
sherpa-onnx-vad-microphone-offline-asr
)
foreach(exe IN LISTS exes)
target_link_libraries(${exe} ${PA_LIB} sherpa-onnx-core)
Expand Down
199 changes: 199 additions & 0 deletions sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc
//
// Copyright (c) 2022-2023 Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <mutex> // NOLINT

#include "portaudio.h" // NOLINT
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/microphone.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"

bool stop = false;
std::mutex mutex;
sherpa_onnx::CircularBuffer buffer(16000 * 60);

static int32_t RecordCallback(const void *input_buffer,
void * /*output_buffer*/,
unsigned long frames_per_buffer, // NOLINT
const PaStreamCallbackTimeInfo * /*time_info*/,
PaStreamCallbackFlags /*status_flags*/,
void *user_data) {
std::lock_guard<std::mutex> lock(mutex);
buffer.Push(reinterpret_cast<const float *>(input_buffer), frames_per_buffer);

return stop ? paComplete : paContinue;
}

static void Handler(int32_t sig) {
stop = true;
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

int32_t main(int32_t argc, char *argv[]) {
signal(SIGINT, Handler);

const char *kUsageMessage = R"usage(
This program shows how to use a streaming VAD with non-streaming ASR in
sherpa-onnx.
Please download silero_vad.onnx from
https://github.com/snakers4/silero-vad/blob/master/files/silero_vad.onnx
For instance, use
wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
Please refer to ./sherpa-onnx-microphone-offline.cc
to download models for offline ASR.
(1) Transducer from icefall
./bin/sherpa-onnx-vad-microphone-offline-asr \
--silero-vad-model=/path/to/silero_vad.onnx \
--tokens=/path/to/tokens.txt \
--encoder=/path/to/encoder.onnx \
--decoder=/path/to/decoder.onnx \
--joiner=/path/to/joiner.onnx
(2) Paraformer from FunASR
./bin/sherpa-onnx-vad-microphone-offline-asr \
--silero-vad-model=/path/to/silero_vad.onnx \
--tokens=/path/to/tokens.txt \
--paraformer=/path/to/model.onnx \
--num-threads=1
(3) Whisper models
./bin/sherpa-onnx-vad-microphone-offline-asr \
--silero-vad-model=/path/to/silero_vad.onnx \
--whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
--whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
--tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
--num-threads=1
)usage";

sherpa_onnx::ParseOptions po(kUsageMessage);
sherpa_onnx::VadModelConfig vad_config;

sherpa_onnx::OfflineRecognizerConfig asr_config;

vad_config.Register(&po);
asr_config.Register(&po);

po.Read(argc, argv);
if (po.NumArgs() != 0) {
po.PrintUsage();
exit(EXIT_FAILURE);
}

fprintf(stderr, "%s\n", vad_config.ToString().c_str());
fprintf(stderr, "%s\n", asr_config.ToString().c_str());

if (!vad_config.Validate()) {
fprintf(stderr, "Errors in vad_config!\n");
return -1;
}

if (!asr_config.Validate()) {
fprintf(stderr, "Errors in asr_config!\n");
return -1;
}

fprintf(stderr, "Creating recognizer ...\n");
sherpa_onnx::OfflineRecognizer recognizer(asr_config);
fprintf(stderr, "Recognizer created!\n");

sherpa_onnx::Microphone mic;

PaDeviceIndex num_devices = Pa_GetDeviceCount();
fprintf(stderr, "Num devices: %d\n", num_devices);

PaStreamParameters param;

param.device = Pa_GetDefaultInputDevice();
if (param.device == paNoDevice) {
fprintf(stderr, "No default input device found\n");
exit(EXIT_FAILURE);
}
fprintf(stderr, "Use default device: %d\n", param.device);

const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
fprintf(stderr, " Name: %s\n", info->name);
fprintf(stderr, " Max input channels: %d\n", info->maxInputChannels);

param.channelCount = 1;
param.sampleFormat = paFloat32;

param.suggestedLatency = info->defaultLowInputLatency;
param.hostApiSpecificStreamInfo = nullptr;
float sample_rate = 16000;

PaStream *stream;
PaError err =
Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
sample_rate,
0, // frames per buffer
paClipOff, // we won't output out of range samples
// so don't bother clipping them
RecordCallback, nullptr);
if (err != paNoError) {
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
exit(EXIT_FAILURE);
}

err = Pa_StartStream(stream);
if (err != paNoError) {
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
exit(EXIT_FAILURE);
}

auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(vad_config);

fprintf(stderr, "Started. Please speak\n");

int32_t window_size = vad_config.silero_vad.window_size;
int32_t index = 0;

while (!stop) {
{
std::lock_guard<std::mutex> lock(mutex);

while (buffer.Size() >= window_size) {
std::vector<float> samples = buffer.Get(buffer.Head(), window_size);
buffer.Pop(window_size);
vad->AcceptWaveform(samples.data(), samples.size());
}
}

while (!vad->Empty()) {
auto &segment = vad->Front();
auto s = recognizer.CreateStream();
s->AcceptWaveform(sample_rate, segment.samples.data(),
segment.samples.size());
recognizer.DecodeStream(s.get());
const auto &result = s->GetResult();
if (!result.text.empty()) {
fprintf(stderr, "%2d: %s\n", index, result.text.c_str());
++index;
}
vad->Pop();
}

Pa_Sleep(100); // sleep for 100ms
}

err = Pa_CloseStream(stream);
if (err != paNoError) {
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
exit(EXIT_FAILURE);
}

return 0;
}
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/sherpa-onnx-vad-microphone.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
0, // frames per buffer
paClipOff, // we won't output out of range samples
// so don't bother clipping them
RecordCallback, &config.silero_vad.window_size);
RecordCallback, nullptr);
if (err != paNoError) {
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
exit(EXIT_FAILURE);
Expand Down

0 comments on commit 68f0e59

Please sign in to comment.