Skip to content

Commit

Permalink
Add node-addon-api for VAD (#864)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored May 11, 2024
1 parent 677bc1d commit eee5d8a
Show file tree
Hide file tree
Showing 15 changed files with 914 additions and 10 deletions.
1 change: 1 addition & 0 deletions nodejs-addon-examples/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
crash.log
9 changes: 9 additions & 0 deletions nodejs-addon-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,12 @@ node ./test_asr_streaming_transducer.js

node ./test_asr_streaming_transducer_microphone.js
```

# VAD

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

node ./test_vad_microphone.js
```

4 changes: 2 additions & 2 deletions nodejs-addon-examples/test_asr_streaming_transducer.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ console.log('Started')
let start = performance.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.samples, wave.sampleRate);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform(tailPadding, wave.sampleRate);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
recognizer.decode(stream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ const display = new sherpa_onnx.Display(50);
ai.on('data', data => {
const samples = new Float32Array(data.buffer);

stream.acceptWaveform(samples, recognizer.config.featConfig.sampleRate);
stream.acceptWaveform(
{sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});

while (recognizer.isReady(stream)) {
recognizer.decode(stream);
Expand Down
88 changes: 88 additions & 0 deletions nodejs-addon-examples/test_vad_microphone.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)

const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createVad() {
// please download silero_vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
const config = {
sileroVad: {
model: './silero_vad.onnx',
threshold: 0.5,
minSpeechDuration: 0.25,
minSilenceDuration: 0.5,
windowSize: 512,
},
sampleRate: 16000,
debug: true,
numThreads: 1,
};

const bufferSizeInSeconds = 60;

return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

vad = createVad();

const bufferSizeInSeconds = 30;
const buffer =
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);


const ai = new portAudio.AudioIO({
inOptions: {
channelCount: 1,
closeOnError: true, // Close the stream if an audio error is detected, if
// set false then just log the error
deviceId: -1, // Use -1 or omit the deviceId to select the default device
sampleFormat: portAudio.SampleFormatFloat32,
sampleRate: vad.config.sampleRate,
}
});

let printed = false;
let index = 0;
ai.on('data', data => {
const windowSize = vad.config.sileroVad.windowSize;
buffer.push(new Float32Array(data.buffer));
while (buffer.size() > windowSize) {
const samples = buffer.get(buffer.head(), windowSize);
buffer.pop(windowSize);
vad.acceptWaveform(samples)
if (vad.isDetected() && !printed) {
console.log(`${index}: Detected speech`)
printed = true;
}

if (!vad.isDetected()) {
printed = false;
}

while (!vad.isEmpty()) {
const segment = vad.front();
vad.pop();
const filename = `${index}-${
new Date()
.toLocaleTimeString('en-US', {hour12: false})
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
const duration = segment.samples.length / vad.config.sampleRate;
console.log(`${index} End of speech. Duration: ${duration} seconds`);
console.log(`Saved to ${filename}`);
index += 1;
}
}
});

ai.on('close', () => {
console.log('Free resources');
});

ai.start();
console.log('Started! Please speak')
2 changes: 1 addition & 1 deletion python-api-examples/offline-tts-play.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
--vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \
--vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \
--vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \
--tts-rule-fsts='./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst' \
--tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \
--vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \
--sid=2 \
--output-filename=./test-2.wav \
Expand Down
2 changes: 1 addition & 1 deletion python-api-examples/offline-tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
--vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \
--vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \
--vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \
--tts-rule-fsts='./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst' \
--tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \
--vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \
--sid=2 \
--output-filename=./test-2.wav \
Expand Down
2 changes: 2 additions & 0 deletions scripts/node-addon-api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ include_directories(${CMAKE_JS_INC})
set(srcs
src/sherpa-onnx-node-addon-api.cc
src/streaming-asr.cc
src/vad.cc
src/wave-reader.cc
src/wave-writer.cc
)

if(NOT DEFINED ENV{SHERPA_ONNX_INSTALL_DIR})
Expand Down
4 changes: 4 additions & 0 deletions scripts/node-addon-api/lib/sherpa-onnx.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
const addon = require('./addon.js')
const streaming_asr = require('./streaming-asr.js');
const vad = require('./vad.js');

module.exports = {
OnlineRecognizer: streaming_asr.OnlineRecognizer,
readWave: addon.readWave,
writeWave: addon.writeWave,
Display: streaming_asr.Display,
Vad: vad.Vad,
CircularBuffer: vad.CircularBuffer,
}
7 changes: 4 additions & 3 deletions scripts/node-addon-api/lib/streaming-asr.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ class OnlineStream {
this.handle = handle;
}

// obj is {samples: samples, sampleRate: sampleRate}
// samples is a float32 array containing samples in the range [-1, 1]
acceptWaveform(samples, sampleRate) {
addon.acceptWaveformOnline(
this.handle, {samples: samples, sampleRate: sampleRate})
// sampleRate is a number
acceptWaveform(obj) {
addon.acceptWaveformOnline(this.handle, obj)
}

inputFinished() {
Expand Down
88 changes: 88 additions & 0 deletions scripts/node-addon-api/lib/vad.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
const addon = require('./addon.js');

class CircularBuffer {
constructor(capacity) {
this.handle = addon.createCircularBuffer(capacity);
}

// samples is a float32 array
push(samples) {
addon.circularBufferPush(this.handle, samples);
}

// return a float32 array
get(startIndex, n) {
return addon.circularBufferGet(this.handle, startIndex, n);
}

pop(n) {
return addon.circularBufferPop(this.handle, n);
}

size() {
return addon.circularBufferSize(this.handle);
}

head() {
return addon.circularBufferHead(this.handle);
}

reset() {
return addon.circularBufferReset(this.handle);
}
}

class Vad {
/*
config = {
sileroVad: {
model: "./silero_vad.onnx",
threshold: 0.5,
}
}
*/
constructor(config, bufferSizeInSeconds) {
this.handle =
addon.createVoiceActivityDetector(config, bufferSizeInSeconds);
this.config = config;
}

acceptWaveform(samples) {
addon.voiceActivityDetectorAcceptWaveform(this.handle, samples)
}

isEmpty() {
return addon.voiceActivityDetectorIsEmpty(this.handle)
}

isDetected() {
return addon.voiceActivityDetectorIsDetected(this.handle)
}

pop() {
addon.voiceActivityDetectorPop(this.handle)
}

clear() {
addon.VoiceActivityDetectorClearWrapper(this.handle)
}

/*
{
samples: a 1-d float32 array,
start: a int32
}
*/
front() {
return addon.voiceActivityDetectorFront(this.handle)
}

reset() {
return addon.VoiceActivityDetectorResetWrapper(this.handle)
}
}

module.exports = {
Vad,
CircularBuffer,
}
4 changes: 4 additions & 0 deletions scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@

void InitStreamingAsr(Napi::Env env, Napi::Object exports);
void InitWaveReader(Napi::Env env, Napi::Object exports);
void InitWaveWriter(Napi::Env env, Napi::Object exports);
void InitVad(Napi::Env env, Napi::Object exports);

Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitStreamingAsr(env, exports);
InitWaveReader(env, exports);
InitWaveWriter(env, exports);
InitVad(env, exports);

return exports;
}
Expand Down
9 changes: 7 additions & 2 deletions scripts/node-addon-api/src/streaming-asr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,13 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
config.provider = p;
}

if (o.Has("debug") && o.Get("debug").IsNumber()) {
config.debug = o.Get("debug").As<Napi::Number>().Int32Value();
if (o.Has("debug") &&
(o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
if (o.Get("debug").IsBoolean()) {
config.debug = o.Get("debug").As<Napi::Boolean>().Value();
} else {
config.debug = o.Get("debug").As<Napi::Number>().Int32Value();
}
}

if (o.Has("modelType") && o.Get("modelType").IsString()) {
Expand Down
Loading

0 comments on commit eee5d8a

Please sign in to comment.