From 9a2394c1a8a45bbd88ea07e5c31f5a50be078720 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 15 Sep 2023 19:45:33 +0800 Subject: [PATCH] Add VadModel --- sherpa-onnx/csrc/CMakeLists.txt | 2 ++ sherpa-onnx/csrc/silero-vad-model-config.h | 3 ++ sherpa-onnx/csrc/silero-vad-model.cc | 32 +++++++++++++++++++ sherpa-onnx/csrc/silero-vad-model.h | 37 ++++++++++++++++++++++ sherpa-onnx/csrc/vad-model.cc | 16 ++++++++++ sherpa-onnx/csrc/vad-model.h | 34 ++++++++++++++++++++ 6 files changed, 124 insertions(+) create mode 100644 sherpa-onnx/csrc/silero-vad-model.cc create mode 100644 sherpa-onnx/csrc/silero-vad-model.h create mode 100644 sherpa-onnx/csrc/vad-model.cc create mode 100644 sherpa-onnx/csrc/vad-model.h diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt index 7b9223c62..46377c1bd 100644 --- a/sherpa-onnx/csrc/CMakeLists.txt +++ b/sherpa-onnx/csrc/CMakeLists.txt @@ -67,6 +67,7 @@ set(sources resample.cc session.cc silero-vad-model-config.cc + silero-vad-model.cc slice.cc stack.cc symbol-table.cc @@ -75,6 +76,7 @@ set(sources unbind.cc utils.cc vad-model-config.cc + vad-model.cc wave-reader.cc ) diff --git a/sherpa-onnx/csrc/silero-vad-model-config.h b/sherpa-onnx/csrc/silero-vad-model-config.h index 3c9653dd7..131554aba 100644 --- a/sherpa-onnx/csrc/silero-vad-model-config.h +++ b/sherpa-onnx/csrc/silero-vad-model-config.h @@ -25,6 +25,9 @@ struct SileroVadModelConfig { // 256, 512, 768 samples for 800 Hz int window_size = 1536; // in samples + // support only 16000 and 8000 + int32_t sample_rate = 16000; + SileroVadModelConfig() = default; void Register(ParseOptions *po); diff --git a/sherpa-onnx/csrc/silero-vad-model.cc b/sherpa-onnx/csrc/silero-vad-model.cc new file mode 100644 index 000000000..33d32d12e --- /dev/null +++ b/sherpa-onnx/csrc/silero-vad-model.cc @@ -0,0 +1,32 @@ +// sherpa-onnx/csrc/silero-vad-model.h +// +// Copyright (c) 2023 Xiaomi Corporation + +#include "sherpa-onnx/csrc/silero-vad-model.h" + +namespace sherpa_onnx { + +class SileroVadModel::Impl { + public: + Impl(const VadModelConfig &config) : config_(config) {} + + void Reset() {} + + bool IsSpeech(const float *samples, int32_t n) { return true; } + + private: + VadModelConfig config_; +}; + +SileroVadModel::SileroVadModel(const VadModelConfig &config) + : impl_(std::make_unique(config)) {} + +SileroVadModel::~SileroVadModel() = default; + +void SileroVadModel::Reset() { return impl_->Reset(); } + +bool SileroVadModel::IsSpeech(const float *samples, int32_t n) { + return impl_->IsSpeech(samples, n); +} + +} // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/silero-vad-model.h b/sherpa-onnx/csrc/silero-vad-model.h new file mode 100644 index 000000000..298749611 --- /dev/null +++ b/sherpa-onnx/csrc/silero-vad-model.h @@ -0,0 +1,37 @@ +// sherpa-onnx/csrc/silero-vad-model.h +// +// Copyright (c) 2023 Xiaomi Corporation +#ifndef SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_H_ +#define SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_H_ + +#include + +#include "sherpa-onnx/csrc/vad-model.h" + +namespace sherpa_onnx { + +class SileroVadModel : public VadModel { + public: + SileroVadModel(const VadModelConfig &config); + ~SileroVadModel() override; + + // reset the internal model states + void Reset() override; + + /** + * @param samples Pointer to a 1-d array containing audio samples. + * Each sample should be normalized to the range [-1, 1]. + * @param n Number of samples. + * + * @return Return true if speech is detected. Return false otherwise. + */ + bool IsSpeech(const float *samples, int32_t n) override; + + private: + class Impl; + std::unique_ptr impl_; +}; + +} // namespace sherpa_onnx + +#endif // SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_H_ diff --git a/sherpa-onnx/csrc/vad-model.cc b/sherpa-onnx/csrc/vad-model.cc new file mode 100644 index 000000000..47d3fc684 --- /dev/null +++ b/sherpa-onnx/csrc/vad-model.cc @@ -0,0 +1,16 @@ +// sherpa-onnx/csrc/vad-model.cc +// +// Copyright (c) 2023 Xiaomi Corporation + +#include "sherpa-onnx/csrc/vad-model.h" + +#include "sherpa-onnx/csrc/silero-vad-model.h" + +namespace sherpa_onnx { + +std::unique_ptr VadModel::Create(const VadModelConfig &config) { + // TODO(fangjun): Support other VAD models. + return std::make_unique(config); +} + +} // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/vad-model.h b/sherpa-onnx/csrc/vad-model.h new file mode 100644 index 000000000..8a90b055c --- /dev/null +++ b/sherpa-onnx/csrc/vad-model.h @@ -0,0 +1,34 @@ +// sherpa-onnx/csrc/vad-model.h +// +// Copyright (c) 2023 Xiaomi Corporation +#ifndef SHERPA_ONNX_CSRC_VAD_MODEL_H_ +#define SHERPA_ONNX_CSRC_VAD_MODEL_H_ + +#include + +#include "sherpa-onnx/csrc/vad-model-config.h" + +namespace sherpa_onnx { + +class VadModel { + public: + virtual ~VadModel() = default; + + static std::unique_ptr Create(const VadModelConfig &config); + + // reset the internal model states + virtual void Reset() = 0; + + /** + * @param samples Pointer to a 1-d array containing audio samples. + * Each sample should be normalized to the range [-1, 1]. + * @param n Number of samples. + * + * @return Return true if speech is detected. Return false otherwise. + */ + virtual bool IsSpeech(const float *samples, int32_t n) = 0; +}; + +} // namespace sherpa_onnx + +#endif // SHERPA_ONNX_CSRC_VAD_MODEL_H_