-
Notifications
You must be signed in to change notification settings - Fork 475
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add speaker identification and verification exmaple for Dart API (#1194)
- Loading branch information
1 parent
963aaba
commit ec98110
Showing
13 changed files
with
270 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# https://dart.dev/guides/libraries/private-files | ||
# Created by `dart pub` | ||
.dart_tool/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Introduction | ||
|
||
This example shows how to use the Dart API from sherpa-onnx for speaker identification. | ||
|
||
| File | Description| | ||
|------|------------| | ||
|[./bin/speaker_id.dart](./bin/speaker_id.dart)| Use a speaker embedding extractor model for speaker identification and verification. See also [./run-3d-speaker.sh](./run-3d-speaker.sh)| |
30 changes: 30 additions & 0 deletions
30
dart-api-examples/speaker-identification/analysis_options.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# This file configures the static analysis results for your project (errors, | ||
# warnings, and lints). | ||
# | ||
# This enables the 'recommended' set of lints from `package:lints`. | ||
# This set helps identify many issues that may lead to problems when running | ||
# or consuming Dart code, and enforces writing Dart using a single, idiomatic | ||
# style and format. | ||
# | ||
# If you want a smaller set of lints you can change this to specify | ||
# 'package:lints/core.yaml'. These are just the most critical lints | ||
# (the recommended set includes the core lints). | ||
# The core lints are also what is used by pub.dev for scoring packages. | ||
|
||
include: package:lints/recommended.yaml | ||
|
||
# Uncomment the following section to specify additional rules. | ||
|
||
# linter: | ||
# rules: | ||
# - camel_case_types | ||
|
||
# analyzer: | ||
# exclude: | ||
# - path/to/excluded/files/** | ||
|
||
# For more information about the core and recommended set of lints, see | ||
# https://dart.dev/go/core-lints | ||
|
||
# For additional information about configuring this file, see | ||
# https://dart.dev/guides/language/analysis-options |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../vad/bin/init.dart |
160 changes: 160 additions & 0 deletions
160
dart-api-examples/speaker-identification/bin/speaker_id.dart
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
// Copyright (c) 2024 Xiaomi Corporation | ||
import 'dart:io'; | ||
import 'dart:typed_data'; | ||
|
||
import 'package:args/args.dart'; | ||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
import './init.dart'; | ||
|
||
Float32List computeEmbedding( | ||
{required sherpa_onnx.SpeakerEmbeddingExtractor extractor, | ||
required String filename}) { | ||
final waveData = sherpa_onnx.readWave(filename); | ||
final stream = extractor.createStream(); | ||
|
||
stream.acceptWaveform( | ||
samples: waveData.samples, | ||
sampleRate: waveData.sampleRate, | ||
); | ||
|
||
stream.inputFinished(); | ||
|
||
final embedding = extractor.compute(stream); | ||
|
||
stream.free(); | ||
|
||
return embedding; | ||
} | ||
|
||
void main(List<String> arguments) async { | ||
await initSherpaOnnx(); | ||
|
||
final parser = ArgParser()..addOption('model', help: 'Path to model.onnx'); | ||
|
||
final res = parser.parse(arguments); | ||
if (res['model'] == null) { | ||
print(parser.usage); | ||
exit(1); | ||
} | ||
|
||
final model = res['model'] as String; | ||
/* | ||
Please download test data by yourself | ||
curl -SL -o sr-data.tar.gz https://github.com/csukuangfj/sr-data/archive/refs/tags/v1.0.0.tar.gz | ||
tar xvf sr-data.tar.gz | ||
mv sr-data-1.0.0 sr-data | ||
*/ | ||
|
||
final config = sherpa_onnx.SpeakerEmbeddingExtractorConfig( | ||
model: model, | ||
numThreads: 1, | ||
debug: true, | ||
provider: 'cpu', | ||
); | ||
final extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config: config); | ||
|
||
final manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim); | ||
|
||
final spk1Files = [ | ||
"./sr-data/enroll/fangjun-sr-1.wav", | ||
"./sr-data/enroll/fangjun-sr-2.wav", | ||
"./sr-data/enroll/fangjun-sr-3.wav", | ||
]; | ||
|
||
final spk1Vec = <Float32List>[]; | ||
for (final f in spk1Files) { | ||
final embedding = computeEmbedding(extractor: extractor, filename: f); | ||
spk1Vec.add(embedding); | ||
} | ||
|
||
final spk2Files = [ | ||
"./sr-data/enroll/leijun-sr-1.wav", | ||
"./sr-data/enroll/leijun-sr-2.wav", | ||
]; | ||
|
||
final spk2Vec = <Float32List>[]; | ||
for (final f in spk2Files) { | ||
final embedding = computeEmbedding(extractor: extractor, filename: f); | ||
spk2Vec.add(embedding); | ||
} | ||
|
||
if (!manager.addMulti(name: "fangjun", embeddingList: spk1Vec)) { | ||
// Note you should free extractor and manager in your app to avoid memory leak | ||
print("Failed to register fangjun"); | ||
return; | ||
} | ||
|
||
if (!manager.addMulti(name: "leijun", embeddingList: spk2Vec)) { | ||
print("Failed to register leijun"); | ||
return; | ||
} | ||
|
||
if (manager.numSpeakers != 2) { | ||
print("There should be two speakers"); | ||
return; | ||
} | ||
|
||
if (!manager.contains("fangjun")) { | ||
print("It should contain the speaker fangjun"); | ||
return; | ||
} | ||
|
||
if (!manager.contains("leijun")) { | ||
print("It should contain the speaker leijun"); | ||
return; | ||
} | ||
|
||
print("---All speakers---"); | ||
final allSpeakers = manager.allSpeakerNames; | ||
for (final s in allSpeakers) { | ||
print(s); | ||
} | ||
print("------------"); | ||
|
||
final testFiles = [ | ||
"./sr-data/test/fangjun-test-sr-1.wav", | ||
"./sr-data/test/leijun-test-sr-1.wav", | ||
"./sr-data/test/liudehua-test-sr-1.wav", | ||
]; | ||
|
||
final threshold = 0.6; | ||
for (final file in testFiles) { | ||
final embedding = computeEmbedding(extractor: extractor, filename: file); | ||
|
||
var name = manager.search(embedding: embedding, threshold: threshold); | ||
if (name == '') { | ||
name = "<Unknown>"; | ||
} | ||
print("$file: $name"); | ||
} | ||
|
||
if (!manager.verify( | ||
name: "fangjun", | ||
embedding: computeEmbedding(extractor: extractor, filename: testFiles[0]), | ||
threshold: threshold)) { | ||
print("{$testFiles[0]} should match fangjun!"); | ||
return; | ||
} | ||
|
||
if (!manager.remove("fangjun")) { | ||
print("Failed to remove fangjun"); | ||
return; | ||
} | ||
|
||
if (manager.verify( | ||
name: "fangjun", | ||
embedding: computeEmbedding(extractor: extractor, filename: testFiles[0]), | ||
threshold: threshold)) { | ||
print("${testFiles[0]} should match no one!"); | ||
return; | ||
} | ||
|
||
if (manager.numSpeakers != 1) { | ||
print("There should only 1 speaker left."); | ||
return; | ||
} | ||
|
||
extractor.free(); | ||
manager.free(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
name: speaker_identification | ||
|
||
description: > | ||
This example demonstrates how to use the Dart API for speaker identification. | ||
version: 1.0.0 | ||
|
||
environment: | ||
sdk: ^3.4.0 | ||
|
||
dependencies: | ||
sherpa_onnx: ^1.10.20 | ||
path: ^1.9.0 | ||
args: ^2.5.0 | ||
|
||
dev_dependencies: | ||
lints: ^3.0.0 |
19 changes: 19 additions & 0 deletions
19
dart-api-examples/speaker-identification/run-3d-speaker.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
dart pub get | ||
|
||
if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | ||
fi | ||
|
||
if [ ! -f ./sr-data/enroll/leijun-sr-1.wav ]; then | ||
curl -SL -o sr-data.tar.gz https://github.com/csukuangfj/sr-data/archive/refs/tags/v1.0.0.tar.gz | ||
tar xvf sr-data.tar.gz | ||
mv sr-data-1.0.0 sr-data | ||
fi | ||
|
||
dart run \ | ||
./bin/speaker_id.dart \ | ||
--model ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
name: speaker_identification | ||
|
||
description: > | ||
This example demonstrates how to use the Dart API for speaker identification. | ||
version: 1.0.0 | ||
|
||
environment: | ||
sdk: ^3.4.0 | ||
|
||
dependencies: | ||
sherpa_onnx: | ||
path: ../../flutter/sherpa_onnx | ||
path: ^1.9.0 | ||
args: ^2.5.0 | ||
|
||
dev_dependencies: | ||
lints: ^3.0.0 |