Skip to content

Commit

Permalink
Add VadModel
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Sep 15, 2023
1 parent 4767a27 commit 9a2394c
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 0 deletions.
2 changes: 2 additions & 0 deletions sherpa-onnx/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ set(sources
resample.cc
session.cc
silero-vad-model-config.cc
silero-vad-model.cc
slice.cc
stack.cc
symbol-table.cc
Expand All @@ -75,6 +76,7 @@ set(sources
unbind.cc
utils.cc
vad-model-config.cc
vad-model.cc
wave-reader.cc
)

Expand Down
3 changes: 3 additions & 0 deletions sherpa-onnx/csrc/silero-vad-model-config.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ struct SileroVadModelConfig {
// 256, 512, 768 samples for 800 Hz
int window_size = 1536; // in samples

// support only 16000 and 8000
int32_t sample_rate = 16000;

SileroVadModelConfig() = default;

void Register(ParseOptions *po);
Expand Down
32 changes: 32 additions & 0 deletions sherpa-onnx/csrc/silero-vad-model.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// sherpa-onnx/csrc/silero-vad-model.h
//
// Copyright (c) 2023 Xiaomi Corporation

#include "sherpa-onnx/csrc/silero-vad-model.h"

namespace sherpa_onnx {

class SileroVadModel::Impl {
public:
Impl(const VadModelConfig &config) : config_(config) {}

void Reset() {}

bool IsSpeech(const float *samples, int32_t n) { return true; }

private:
VadModelConfig config_;
};

SileroVadModel::SileroVadModel(const VadModelConfig &config)
: impl_(std::make_unique<Impl>(config)) {}

SileroVadModel::~SileroVadModel() = default;

void SileroVadModel::Reset() { return impl_->Reset(); }

bool SileroVadModel::IsSpeech(const float *samples, int32_t n) {
return impl_->IsSpeech(samples, n);
}

} // namespace sherpa_onnx
37 changes: 37 additions & 0 deletions sherpa-onnx/csrc/silero-vad-model.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// sherpa-onnx/csrc/silero-vad-model.h
//
// Copyright (c) 2023 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_H_
#define SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_H_

#include <memory>

#include "sherpa-onnx/csrc/vad-model.h"

namespace sherpa_onnx {

class SileroVadModel : public VadModel {
public:
SileroVadModel(const VadModelConfig &config);
~SileroVadModel() override;

// reset the internal model states
void Reset() override;

/**
* @param samples Pointer to a 1-d array containing audio samples.
* Each sample should be normalized to the range [-1, 1].
* @param n Number of samples.
*
* @return Return true if speech is detected. Return false otherwise.
*/
bool IsSpeech(const float *samples, int32_t n) override;

private:
class Impl;
std::unique_ptr<Impl> impl_;
};

} // namespace sherpa_onnx

#endif // SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_H_
16 changes: 16 additions & 0 deletions sherpa-onnx/csrc/vad-model.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// sherpa-onnx/csrc/vad-model.cc
//
// Copyright (c) 2023 Xiaomi Corporation

#include "sherpa-onnx/csrc/vad-model.h"

#include "sherpa-onnx/csrc/silero-vad-model.h"

namespace sherpa_onnx {

std::unique_ptr<VadModel> VadModel::Create(const VadModelConfig &config) {
// TODO(fangjun): Support other VAD models.
return std::make_unique<SileroVadModel>(config);
}

} // namespace sherpa_onnx
34 changes: 34 additions & 0 deletions sherpa-onnx/csrc/vad-model.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// sherpa-onnx/csrc/vad-model.h
//
// Copyright (c) 2023 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_VAD_MODEL_H_
#define SHERPA_ONNX_CSRC_VAD_MODEL_H_

#include <memory>

#include "sherpa-onnx/csrc/vad-model-config.h"

namespace sherpa_onnx {

class VadModel {
public:
virtual ~VadModel() = default;

static std::unique_ptr<VadModel> Create(const VadModelConfig &config);

// reset the internal model states
virtual void Reset() = 0;

/**
* @param samples Pointer to a 1-d array containing audio samples.
* Each sample should be normalized to the range [-1, 1].
* @param n Number of samples.
*
* @return Return true if speech is detected. Return false otherwise.
*/
virtual bool IsSpeech(const float *samples, int32_t n) = 0;
};

} // namespace sherpa_onnx

#endif // SHERPA_ONNX_CSRC_VAD_MODEL_H_

0 comments on commit 9a2394c

Please sign in to comment.