Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Jan 13, 2024
1 parent ed33030 commit 34d4166
Show file tree
Hide file tree
Showing 15 changed files with 62 additions and 25 deletions.
14 changes: 14 additions & 0 deletions .github/scripts/test-speaker-recognition-python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,19 @@ done
ls -lh
popd

log "Download NeMo models"
model_dir=$d/nemo
mkdir -p $model_dir
pushd $model_dir
models=(
nemo_en_titanet_large.onnx
nemo_en_titanet_small.onnx
nemo_en_speakerverification_speakernet.onnx
)
for m in ${models[@]}; do
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$m
done
ls -lh
popd

python3 sherpa-onnx/python/tests/test_speaker_recognition.py --verbose
16 changes: 8 additions & 8 deletions cmake/kaldi-native-fbank.cmake
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
function(download_kaldi_native_fbank)
include(FetchContent)

set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.18.5.tar.gz")
set(kaldi_native_fbank_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-native-fbank-1.18.5.tar.gz")
set(kaldi_native_fbank_HASH "SHA256=dce0cb3bc6fece5d8053d8780cb4ce22da57cb57ebec332641661521a0425283")
set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.18.6.tar.gz")
set(kaldi_native_fbank_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-native-fbank-1.18.6.tar.gz")
set(kaldi_native_fbank_HASH "SHA256=6202a00cd06ba8ff89beb7b6f85cda34e073e94f25fc29e37c519bff0706bf19")

set(KALDI_NATIVE_FBANK_BUILD_TESTS OFF CACHE BOOL "" FORCE)
set(KALDI_NATIVE_FBANK_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
Expand All @@ -12,11 +12,11 @@ function(download_kaldi_native_fbank)
# If you don't have access to the Internet,
# please pre-download kaldi-native-fbank
set(possible_file_locations
$ENV{HOME}/Downloads/kaldi-native-fbank-1.18.5.tar.gz
${PROJECT_SOURCE_DIR}/kaldi-native-fbank-1.18.5.tar.gz
${PROJECT_BINARY_DIR}/kaldi-native-fbank-1.18.5.tar.gz
/tmp/kaldi-native-fbank-1.18.5.tar.gz
/star-fj/fangjun/download/github/kaldi-native-fbank-1.18.5.tar.gz
$ENV{HOME}/Downloads/kaldi-native-fbank-1.18.6.tar.gz
${PROJECT_SOURCE_DIR}/kaldi-native-fbank-1.18.6.tar.gz
${PROJECT_BINARY_DIR}/kaldi-native-fbank-1.18.6.tar.gz
/tmp/kaldi-native-fbank-1.18.6.tar.gz
/star-fj/fangjun/download/github/kaldi-native-fbank-1.18.6.tar.gz
)

foreach(f IN LISTS possible_file_locations)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h
//
// Copyright (c) 2023 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_GENERAL_IMPL_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_GENERAL_IMPL_H_
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/speaker-embedding-extractor-impl.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-extractor-impl.cc
//
// Copyright (c) 2023 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation
#include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h"

#include "sherpa-onnx/csrc/macros.h"
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/speaker-embedding-extractor-impl.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-extractor-impl.h
//
// Copyright (c) 2023 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_IMPL_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_IMPL_H_
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/speaker-embedding-extractor-model.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-extractor-model.cc
//
// Copyright (c) 2023-2024 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation

#include "sherpa-onnx/csrc/speaker-embedding-extractor-model.h"

Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/speaker-embedding-extractor-model.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-extractor-model.h
//
// Copyright (c) 2023-2024 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_MODEL_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_MODEL_H_

Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/speaker-embedding-extractor-nemo-impl.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-extractor-nemo-impl.h
//
// Copyright (c) 2023 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_IMPL_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_IMPL_H_
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h
//
// Copyright (c) 2023 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_META_DATA_H_

Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/speaker-embedding-extractor.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-extractor.cc
//
// Copyright (c) 2023 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation

#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"

Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/speaker-embedding-extractor.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-extractor.h
//
// Copyright (c) 2023 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/speaker-embedding-manager-test.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-manager-test.cc
//
// Copyright (c) 2023 Jingzhao Ou ([email protected])
// Copyright (c) 2024 Jingzhao Ou ([email protected])

#include "sherpa-onnx/csrc/speaker-embedding-manager.h"

Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/speaker-embedding-manager.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-manager.cc
//
// Copyright (c) 2023 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation

#include "sherpa-onnx/csrc/speaker-embedding-manager.h"

Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/speaker-embedding-manager.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// sherpa-onnx/csrc/speaker-embedding-manager.h
//
// Copyright (c) 2023 Xiaomi Corporation
// Copyright (c) 2024 Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_MANAGER_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_MANAGER_H_
Expand Down
33 changes: 28 additions & 5 deletions sherpa-onnx/python/tests/test_speaker_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def load_speaker_embedding_model(model_filename):
return extractor


def test_wespeaker_model(model_filename: str):
def test_zh_models(model_filename: str):
model_filename = str(model_filename)
if "en" in model_filename:
print(f"skip {model_filename}")
Expand Down Expand Up @@ -114,8 +114,9 @@ def test_wespeaker_model(model_filename: str):
assert ans == name, (name, ans)


def test_3dspeaker_model(model_filename: str):
extractor = load_speaker_embedding_model(str(model_filename))
def test_en_and_zh_models(model_filename: str):
model_filename = str(model_filename)
extractor = load_speaker_embedding_model(model_filename)
manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim)

filenames = [
Expand All @@ -124,7 +125,14 @@ def test_3dspeaker_model(model_filename: str):
"speaker1_a_en_16k",
"speaker2_a_en_16k",
]
is_en = "en" in model_filename
for filename in filenames:
if is_en and "cn" in filename:
continue

if not is_en and "en" in filename:
continue

name = filename.rsplit("_", maxsplit=1)[0]
data, sample_rate = read_wave(
f"/tmp/sr-models/sr-data/test/3d-speaker/{filename}.wav"
Expand All @@ -145,6 +153,11 @@ def test_3dspeaker_model(model_filename: str):
"speaker1_b_en_16k",
]
for filename in filenames:
if is_en and "cn" in filename:
continue

if not is_en and "en" in filename:
continue
print(filename)
name = filename.rsplit("_", maxsplit=1)[0]
name = name.replace("b_cn", "a_cn")
Expand Down Expand Up @@ -178,7 +191,8 @@ def test_wespeaker_models(self):
return
for filename in model_dir.glob("*.onnx"):
print(filename)
test_wespeaker_model(filename)
test_zh_models(filename)
test_en_and_zh_models(filename)

def test_3dpeaker_models(self):
model_dir = Path(d) / "3dspeaker"
Expand All @@ -187,7 +201,16 @@ def test_3dpeaker_models(self):
return
for filename in model_dir.glob("*.onnx"):
print(filename)
test_3dspeaker_model(filename)
test_en_and_zh_models(filename)

def test_nemo_models(self):
model_dir = Path(d) / "nemo"
if not model_dir.is_dir():
print(f"{model_dir} does not exist - skip it")
return
for filename in model_dir.glob("*.onnx"):
print(filename)
test_en_and_zh_models(filename)


if __name__ == "__main__":
Expand Down

0 comments on commit 34d4166

Please sign in to comment.