Skip to content

Commit

Permalink
Add WebAssembly for SenseVoice (#1158)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jul 21, 2024
1 parent c3260ef commit 70d1435
Show file tree
Hide file tree
Showing 21 changed files with 383 additions and 351 deletions.
7 changes: 7 additions & 0 deletions .github/scripts/test-nodejs-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ ls -lh
ls -lh node_modules

# offline asr
#
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

node ./test-offline-sense-voice.js
rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
ls -lh
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/test-nodejs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ jobs:
with:
fetch-depth: 0

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ${{ matrix.os }}-${{ matrix.build_type }}-wasm-nodejs

- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14

Expand Down Expand Up @@ -77,6 +82,10 @@ jobs:
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
cmake --version
./build-wasm-simd-nodejs.sh
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/
Expand Down
15 changes: 15 additions & 0 deletions nodejs-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,21 @@ tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
node ./test-offline-paraformer.js
```

## ./test-offline-sense-voice.js

[./test-offline-sense-voice.js](./test-offline-sense-voice.js) demonstrates
how to decode a file with a non-streaming Paraformer model.

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

node ./test-offline-sense-voice.js
```

## ./test-offline-transducer.js

[./test-offline-transducer.js](./test-offline-transducer.js) demonstrates
Expand Down
26 changes: 0 additions & 26 deletions nodejs-examples/test-offline-nemo-ctc.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,47 +13,21 @@ function createOfflineRecognizer() {
};

let modelConfig = {
transducer: {
encoder: '',
decoder: '',
joiner: '',
},
paraformer: {
model: '',
},
nemoCtc: {
model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',
},
whisper: {
encoder: '',
decoder: '',
language: '',
task: '',
tailPaddings: -1,
},
tdnn: {
model: '',
},
tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',
numThreads: 1,
debug: 0,
provider: 'cpu',
modelType: 'nemo_ctc',
};

let lmConfig = {
model: '',
scale: 1.0,
};

let config = {
featConfig: featConfig,
modelConfig: modelConfig,
lmConfig: lmConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
hotwordsFile: '',
hotwordsScore: 1.5,
};

return sherpa_onnx.createOfflineRecognizer(config);
Expand Down
26 changes: 0 additions & 26 deletions nodejs-examples/test-offline-paraformer-itn.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,47 +13,21 @@ function createOfflineRecognizer() {
};

let modelConfig = {
transducer: {
encoder: '',
decoder: '',
joiner: '',
},
paraformer: {
model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
},
nemoCtc: {
model: '',
},
whisper: {
encoder: '',
decoder: '',
language: '',
task: '',
tailPaddings: -1,
},
tdnn: {
model: '',
},
tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
numThreads: 1,
debug: 0,
provider: 'cpu',
modelType: 'paraformer',
};

let lmConfig = {
model: '',
scale: 1.0,
};

let config = {
featConfig: featConfig,
modelConfig: modelConfig,
lmConfig: lmConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
hotwordsFile: '',
hotwordsScore: 1.5,
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
ruleFsts: './itn_zh_number.fst',
};
Expand Down
27 changes: 0 additions & 27 deletions nodejs-examples/test-offline-paraformer.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,47 +13,20 @@ function createOfflineRecognizer() {
};

let modelConfig = {
transducer: {
encoder: '',
decoder: '',
joiner: '',
},
paraformer: {
model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
},
nemoCtc: {
model: '',
},
whisper: {
encoder: '',
decoder: '',
language: '',
task: '',
tailPaddings: -1,
},
tdnn: {
model: '',
},
tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
numThreads: 1,
debug: 0,
provider: 'cpu',
modelType: 'paraformer',
};

let lmConfig = {
model: '',
scale: 1.0,
};

let config = {
featConfig: featConfig,
modelConfig: modelConfig,
lmConfig: lmConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
hotwordsFile: '',
hotwordsScore: 1.5,
};

return sherpa_onnx.createOfflineRecognizer(config);
Expand Down
101 changes: 101 additions & 0 deletions nodejs-examples/test-offline-sense-voice.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)

const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
let featConfig = {
sampleRate: 16000,
featureDim: 80,
};

let modelConfig = {
senseVoice: {
model:
'./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
language: '',
useInverseTextNormalization: 1,
},
tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
numThreads: 1,
debug: 0,
provider: 'cpu',
};

let config = {
featConfig: featConfig,
modelConfig: modelConfig,
decodingMethod: 'greedy_search',
};

return sherpa_onnx.createOfflineRecognizer(config);
}


const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
'./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav';

const reader = new wav.Reader();
const readable = new Readable().wrap(reader);
const buf = [];

reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
if (sampleRate != recognizer.config.featConfig.sampleRate) {
throw new Error(`Only support sampleRate ${
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
}

if (audioFormat != 1) {
throw new Error(`Only support PCM format. Given ${audioFormat}`);
}

if (channels != 1) {
throw new Error(`Only a single channel. Given ${channel}`);
}

if (bitDepth != 16) {
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
}
});

fs.createReadStream(waveFilename, {'highWaterMark': 4096})
.pipe(reader)
.on('finish', function(err) {
// tail padding
const floatSamples =
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);

buf.push(floatSamples);
const flattened =
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));

stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();
});

readable.on('readable', function() {
let chunk;
while ((chunk = readable.read()) != null) {
const int16Samples = new Int16Array(
chunk.buffer, chunk.byteOffset,
chunk.length / Int16Array.BYTES_PER_ELEMENT);

const floatSamples = new Float32Array(int16Samples.length);
for (let i = 0; i < floatSamples.length; i++) {
floatSamples[i] = int16Samples[i] / 32768.0;
}

buf.push(floatSamples);
}
});
22 changes: 0 additions & 22 deletions nodejs-examples/test-offline-transducer.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,38 +21,16 @@ function createOfflineRecognizer() {
joiner:
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
},
paraformer: {
model: '',
},
nemoCtc: {
model: '',
},
whisper: {
encoder: '',
decoder: '',
language: '',
task: '',
tailPaddings: -1,
},
tdnn: {
model: '',
},
tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
numThreads: 1,
debug: 0,
provider: 'cpu',
modelType: 'transducer',
};

let lmConfig = {
model: '',
scale: 1.0,
};

let config = {
featConfig: featConfig,
modelConfig: modelConfig,
lmConfig: lmConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
hotwordsFile: '',
Expand Down
4 changes: 0 additions & 4 deletions nodejs-examples/test-offline-tts-en.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@ const sherpa_onnx = require('sherpa-onnx');
function createOfflineTts() {
let offlineTtsVitsModelConfig = {
model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx',
lexicon: '',
tokens: './vits-piper-en_US-amy-low/tokens.txt',
dataDir: './vits-piper-en_US-amy-low/espeak-ng-data',
dictDir: '',
noiseScale: 0.667,
noiseScaleW: 0.8,
lengthScale: 1.0,
Expand All @@ -22,8 +20,6 @@ function createOfflineTts() {

let offlineTtsConfig = {
offlineTtsModelConfig: offlineTtsModelConfig,
ruleFsts: '',
ruleFars: '',
maxNumSentences: 1,
};

Expand Down
3 changes: 0 additions & 3 deletions nodejs-examples/test-offline-tts-zh.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ function createOfflineTts() {
model: './vits-icefall-zh-aishell3/model.onnx',
lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
tokens: './vits-icefall-zh-aishell3/tokens.txt',
dataDir: '',
dictDir: '',
noiseScale: 0.667,
noiseScaleW: 0.8,
lengthScale: 1.0,
Expand All @@ -31,7 +29,6 @@ function createOfflineTts() {
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}


const tts = createOfflineTts();
const speakerId = 66;
const speed = 1.0;
Expand Down
Loading

0 comments on commit 70d1435

Please sign in to comment.