Skip to content

Commit

Permalink
Add streaming ASR examples for Dart API (#1009)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jun 15, 2024
1 parent d945066 commit e307767
Show file tree
Hide file tree
Showing 30 changed files with 1,021 additions and 2 deletions.
26 changes: 25 additions & 1 deletion .github/scripts/test-dart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,30 @@ set -ex

cd dart-api-examples

pushd streaming-asr

echo '----------streaming zipformer ctc HLG----------'
./run-zipformer-ctc-hlg.sh
rm -rf sherpa-onnx-*

echo '----------streaming zipformer ctc----------'
./run-zipformer-ctc.sh
rm -rf sherpa-onnx-*

echo '----------streaming zipformer transducer----------'
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*

echo '----------streaming NeMo transducer----------'
./run-nemo-transducer.sh
rm -rf sherpa-onnx-*

echo '----------streaming paraformer----------'
./run-paraformer.sh
rm -rf sherpa-onnx-*

popd # streaming-asr

pushd non-streaming-asr

echo '----------VAD with paraformer----------'
Expand Down Expand Up @@ -34,7 +58,7 @@ echo '----------zipformer transducer----------'
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*

popd
popd # non-streaming-asr

pushd vad
./run.sh
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/test-dart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ on:
- master
paths:
- '.github/workflows/test-dart.yaml'
- '.github/scripts/test-dart.sh'
- 'dart-api-examples/**'
pull_request:
branches:
- master
paths:
- '.github/workflows/test-dart.yaml'
- '.github/scripts/test-dart.sh'
- 'dart-api-examples/**'

workflow_dispatch:
Expand Down Expand Up @@ -89,5 +91,6 @@ jobs:
run: |
cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml
cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml
cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml
.github/scripts/test-dart.sh
1 change: 1 addition & 0 deletions dart-api-examples/non-streaming-asr/bin/nemo-ctc.dart
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

Expand Down
1 change: 1 addition & 0 deletions dart-api-examples/non-streaming-asr/bin/paraformer.dart
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

Expand Down
1 change: 1 addition & 0 deletions dart-api-examples/non-streaming-asr/bin/whisper.dart
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

Expand Down
3 changes: 3 additions & 0 deletions dart-api-examples/streaming-asr/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/
3 changes: 3 additions & 0 deletions dart-api-examples/streaming-asr/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 1.0.0

- Initial version.
11 changes: 11 additions & 0 deletions dart-api-examples/streaming-asr/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Introduction

This folder contains examples for streaming ASR with Dart API.
| File | Description|
|------|------------|
|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|
|[./bin/paraformer.dart](./bin/paraformer.dart)| Use a Paraformer model for speech recognition. See [./run-paraformer.sh](./run-paraformer.sh)|
|[./bin/zipformer-ctc-hlg.dart](./bin/zipformer-ctc-hlg.dart)| Use a Zipformer CTC model with HLG graph for speech recognition. See [./run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|
|[./bin/zipformer-ctc.dart](./bin/zipformer-ctc.dart)| Use a Zipformer CTC model for speech recognition. See [./run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|
|[./bin/zipformer-transducer.dart](./bin/zipformer-transducer.dart)| Use a Zipformer transducer model for speech recognition. See [./run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|

30 changes: 30 additions & 0 deletions dart-api-examples/streaming-asr/analysis_options.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
# rules:
# - camel_case_types

# analyzer:
# exclude:
# - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options
1 change: 1 addition & 0 deletions dart-api-examples/streaming-asr/bin/init.dart
1 change: 1 addition & 0 deletions dart-api-examples/streaming-asr/bin/nemo-transducer.dart
92 changes: 92 additions & 0 deletions dart-api-examples/streaming-asr/bin/paraformer.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
await initSherpaOnnx();

final parser = ArgParser()
..addOption('encoder', help: 'Path to the encoder model')
..addOption('decoder', help: 'Path to decoder model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('input-wav', help: 'Path to input.wav to transcribe');

final res = parser.parse(arguments);
if (res['encoder'] == null ||
res['decoder'] == null ||
res['tokens'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}

final encoder = res['encoder'] as String;
final decoder = res['decoder'] as String;
final tokens = res['tokens'] as String;
final inputWav = res['input-wav'] as String;

final paraformer = sherpa_onnx.OnlineParaformerModelConfig(
encoder: encoder,
decoder: decoder,
);

final modelConfig = sherpa_onnx.OnlineModelConfig(
paraformer: paraformer,
tokens: tokens,
debug: true,
numThreads: 1,
);
final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig);
final recognizer = sherpa_onnx.OnlineRecognizer(config);

final waveData = sherpa_onnx.readWave(inputWav);
final stream = recognizer.createStream();

// simulate streaming. You can choose an arbitrary chunk size.
// chunkSize of a single sample is also ok, i.e, chunkSize = 1
final chunkSize = 1600; // 0.1 second for 16kHz
final numChunks = waveData.samples.length ~/ chunkSize;

var last = '';
for (int i = 0; i != numChunks; ++i) {
int start = i * chunkSize;
stream.acceptWaveform(
samples:
Float32List.sublistView(waveData.samples, start, start + chunkSize),
sampleRate: waveData.sampleRate,
);
while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}
final result = recognizer.getResult(stream);
if (result.text != last && result.text != '') {
last = result.text;
print(last);
}
}

// 0.5 seconds, assume sampleRate is 16kHz
final tailPaddings = Float32List(8000);
stream.acceptWaveform(
samples: tailPaddings,
sampleRate: waveData.sampleRate,
);

while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}

final result = recognizer.getResult(stream);

if (result.text != '') {
print(result.text);
}

stream.free();
recognizer.free();
}
94 changes: 94 additions & 0 deletions dart-api-examples/streaming-asr/bin/zipformer-ctc-hlg.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
await initSherpaOnnx();

final parser = ArgParser()
..addOption('model', help: 'Path to the model')
..addOption('hlg', help: 'Path to HLG.fst')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('input-wav', help: 'Path to input.wav to transcribe');

final res = parser.parse(arguments);
if (res['model'] == null ||
res['hlg'] == null ||
res['tokens'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}

final model = res['model'] as String;
final hlg = res['hlg'] as String;
final tokens = res['tokens'] as String;
final inputWav = res['input-wav'] as String;

final ctc = sherpa_onnx.OnlineZipformer2CtcModelConfig(
model: model,
);

final modelConfig = sherpa_onnx.OnlineModelConfig(
zipformer2Ctc: ctc,
tokens: tokens,
debug: true,
numThreads: 1,
);
final config = sherpa_onnx.OnlineRecognizerConfig(
model: modelConfig,
ctcFstDecoderConfig: sherpa_onnx.OnlineCtcFstDecoderConfig(graph: hlg),
);
final recognizer = sherpa_onnx.OnlineRecognizer(config);

final waveData = sherpa_onnx.readWave(inputWav);
final stream = recognizer.createStream();

// simulate streaming. You can choose an arbitrary chunk size.
// chunkSize of a single sample is also ok, i.e, chunkSize = 1
final chunkSize = 1600; // 0.1 second for 16kHz
final numChunks = waveData.samples.length ~/ chunkSize;

var last = '';
for (int i = 0; i != numChunks; ++i) {
int start = i * chunkSize;
stream.acceptWaveform(
samples:
Float32List.sublistView(waveData.samples, start, start + chunkSize),
sampleRate: waveData.sampleRate,
);
while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}
final result = recognizer.getResult(stream);
if (result.text != last && result.text != '') {
last = result.text;
print(last);
}
}

// 0.5 seconds, assume sampleRate is 16kHz
final tailPaddings = Float32List(8000);
stream.acceptWaveform(
samples: tailPaddings,
sampleRate: waveData.sampleRate,
);

while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}

final result = recognizer.getResult(stream);

if (result.text != '') {
print(result.text);
}

stream.free();
recognizer.free();
}
Loading

0 comments on commit e307767

Please sign in to comment.