Skip to content

Commit

Permalink
Add streaming CTC ASR APIs for node-addon-api (k2-fsa#867)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored May 13, 2024
1 parent f730a93 commit 8168921
Show file tree
Hide file tree
Showing 15 changed files with 445 additions and 31 deletions.
20 changes: 11 additions & 9 deletions .github/scripts/test-nodejs-addon-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,6 @@ set -ex
d=nodejs-addon-examples
echo "dir: $d"
cd $d
npm install --verbose
git status
ls -lh
ls -lh node_modules

export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
Expand All @@ -22,3 +13,14 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
node test_asr_streaming_transducer.js

rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2

node ./test_asr_streaming_ctc.js

# To decode with HLG.fst
node ./test_asr_streaming_ctc_hlg.js

rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
26 changes: 16 additions & 10 deletions .github/workflows/test-nodejs-addon-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,17 +152,23 @@ jobs:
./node_modules/.bin/cmake-js compile --log-level verbose
- name: Test streaming transducer
- name: Run tests
shell: bash
run: |
export PATH=$PWD/build/install/lib:$PATH
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
cd scripts/node-addon-api
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
node test/test_asr_streaming_transducer.js
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
d=nodejs-addon-examples
cd $d
files=$(ls *.js)
echo $files
for f in ${files[@]}; do
echo $f
sed -i.bak s%sherpa-onnx-node%./sherpa-onnx% ./$f
done
cd ..
cp -v scripts/node-addon-api/build/Release/sherpa-onnx.node $d/
cp -v scripts/node-addon-api/lib/*.js $d/
cp -v ./build/install/lib/lib* $d/
.github/scripts/test-nodejs-addon-npm.sh
15 changes: 15 additions & 0 deletions .github/workflows/test-nodejs-addon-npm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,19 @@ jobs:
- name: Run tests
shell: bash
run: |
d=nodejs-addon-examples
echo "dir: $d"
cd $d
npm install --verbose
git status
ls -lh
ls -lh node_modules
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
cd ../
.github/scripts/test-nodejs-addon-npm.sh
30 changes: 24 additions & 6 deletions nodejs-addon-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,18 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
```

# Voice Activity detection (VAD)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx


# To run the test with a microphone, you need to install the package naudiodon2
npm install naudiodon2

node ./test_vad_microphone.js
```

## Streaming speech recognition with zipformer transducer

```bash
Expand All @@ -36,21 +48,27 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

node ./test_asr_streaming_transducer.js

# To run the test with microphone, you need to install the package naudiodon2
# To run the test with a microphone, you need to install the package naudiodon2
npm install naudiodon2

node ./test_asr_streaming_transducer_microphone.js
```

# VAD
## Streaming speech recognition with zipformer CTC

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2

node ./test_asr_streaming_ctc.js

# To run the test with microphone, you need to install the package naudiodon2
# To decode with HLG.fst
node ./test_asr_streaming_ctc_hlg.js

# To run the test with a microphone, you need to install the package naudiodon2
npm install naudiodon2

node ./test_vad_microphone.js
node ./test_asr_streaming_ctc_microphone.js
node ./test_asr_streaming_ctc_hlg_microphone.js
```

55 changes: 55 additions & 0 deletions nodejs-addon-examples/test_asr_streaming_ctc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;


// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
'featConfig': {
'sampleRate': 16000,
'featureDim': 80,
},
'modelConfig': {
'zipformer2Ctc': {
'model':
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
},
'tokens':
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
'numThreads': 2,
'provider': 'cpu',
'debug': 1,
}
};

const waveFilename =
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OnlineRecognizer(config);
console.log('Started')
let start = performance.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}
result = recognizer.getResult(stream)
let stop = performance.now();
console.log('Done')

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
console.log(waveFilename)
console.log('result\n', result)
58 changes: 58 additions & 0 deletions nodejs-addon-examples/test_asr_streaming_ctc_hlg.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;


// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
'featConfig': {
'sampleRate': 16000,
'featureDim': 80,
},
'modelConfig': {
'zipformer2Ctc': {
'model':
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
},
'tokens':
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
'numThreads': 2,
'provider': 'cpu',
'debug': 1,
},
'ctcFstDecoderConfig': {
'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
},
};

const waveFilename =
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav';

const recognizer = new sherpa_onnx.OnlineRecognizer(config);
console.log('Started')
let start = performance.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}
result = recognizer.getResult(stream)
let stop = performance.now();
console.log('Done')

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
console.log(waveFilename)
console.log('result\n', result)
89 changes: 89 additions & 0 deletions nodejs-addon-examples/test_asr_streaming_ctc_hlg_microphone.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createOnlineRecognizer() {
const config = {
'featConfig': {
'sampleRate': 16000,
'featureDim': 80,
},
'modelConfig': {
'zipformer2Ctc': {
'model':
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
},
'tokens':
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
'numThreads': 2,
'provider': 'cpu',
'debug': 1,
},
'ctcFstDecoderConfig': {
'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
},
'enableEndpoint': true,
'rule1MinTrailingSilence': 2.4,
'rule2MinTrailingSilence': 1.2,
'rule3MinUtteranceLength': 20
};

return new sherpa_onnx.OnlineRecognizer(config);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

let lastText = '';
let segmentIndex = 0;

const ai = new portAudio.AudioIO({
inOptions: {
channelCount: 1,
closeOnError: true, // Close the stream if an audio error is detected, if
// set false then just log the error
deviceId: -1, // Use -1 or omit the deviceId to select the default device
sampleFormat: portAudio.SampleFormatFloat32,
sampleRate: recognizer.config.featConfig.sampleRate
}
});

const display = new sherpa_onnx.Display(50);

ai.on('data', data => {
const samples = new Float32Array(data.buffer);

stream.acceptWaveform(
{sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});

while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}

const isEndpoint = recognizer.isEndpoint(stream);
const text = recognizer.getResult(stream).text.toLowerCase();

if (text.length > 0 && lastText != text) {
lastText = text;
display.print(segmentIndex, lastText);
}
if (isEndpoint) {
if (text.length > 0) {
lastText = text;
segmentIndex += 1;
}
recognizer.reset(stream)
}
});

ai.on('close', () => {
console.log('Free resources');
stream.free();
recognizer.free();
});

ai.start();
console.log('Started! Please speak')
Loading

0 comments on commit 8168921

Please sign in to comment.