Skip to content

Commit

Permalink
Add C API for spoken language identification. (#695)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Mar 25, 2024
1 parent 0d258dd commit ab7cff2
Show file tree
Hide file tree
Showing 18 changed files with 363 additions and 67 deletions.
26 changes: 26 additions & 0 deletions .github/scripts/test-c-api.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

set -e

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "SLID_EXE is $SLID_EXE"
echo "PATH: $PATH"


log "------------------------------------------------------------"
log "Download whisper tiny for spoken language identification "
log "------------------------------------------------------------"

rm -rf sherpa-onnx-whisper-tiny*
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2

$SLID_EXE

rm -rf sherpa-onnx-whisper-tiny*
52 changes: 26 additions & 26 deletions .github/scripts/test-spoken-language-identification.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,32 +28,32 @@ ar-arabic.wav
bg-bulgarian.wav
cs-czech.wav
da-danish.wav
de-german.wav
el-greek.wav
en-english.wav
es-spanish.wav
fa-persian.wav
fi-finnish.wav
fr-french.wav
hi-hindi.wav
hr-croatian.wav
id-indonesian.wav
it-italian.wav
ja-japanese.wav
ko-korean.wav
nl-dutch.wav
no-norwegian.wav
po-polish.wav
pt-portuguese.wav
ro-romanian.wav
ru-russian.wav
sk-slovak.wav
sv-swedish.wav
ta-tamil.wav
tl-tagalog.wav
tr-turkish.wav
uk-ukrainian.wav
zh-chinese.wav
# de-german.wav
# el-greek.wav
# en-english.wav
# es-spanish.wav
# fa-persian.wav
# fi-finnish.wav
# fr-french.wav
# hi-hindi.wav
# hr-croatian.wav
# id-indonesian.wav
# it-italian.wav
# ja-japanese.wav
# ko-korean.wav
# nl-dutch.wav
# no-norwegian.wav
# po-polish.wav
# pt-portuguese.wav
# ro-romanian.wav
# ru-russian.wav
# sk-slovak.wav
# sv-swedish.wav
# ta-tamil.wav
# tl-tagalog.wav
# tr-turkish.wav
# uk-ukrainian.wav
# zh-chinese.wav
)

for wav in ${waves[@]}; do
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/android.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ jobs:
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/build-xcframework.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ jobs:
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
Expand Down
12 changes: 10 additions & 2 deletions .github/workflows/linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,15 @@ jobs:
name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }}
path: build/bin/*

- name: Test spoken language identification
if: matrix.build_type != 'Debug'
- name: Test spoken language identification (C API)
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
.github/scripts/test-c-api.sh
- name: Test spoken language identification (C++ API)
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
Expand Down Expand Up @@ -243,6 +250,7 @@ jobs:
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
Expand Down
11 changes: 9 additions & 2 deletions .github/workflows/macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,15 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test spoken language identification
if: matrix.build_type != 'Debug'
- name: Test spoken language identification (C API)
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
.github/scripts/test-c-api.sh
- name: Test spoken language identification (C++ API)
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
Expand Down
10 changes: 9 additions & 1 deletion .github/workflows/windows-x64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,15 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test spoken language identification
- name: Test spoken language identification (C API)
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe
.github/scripts/test-c-api.sh
- name: Test spoken language identification (C++ API)
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/windows-x86.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test spoken language identification (C API)
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe
.github/scripts/test-c-api.sh
# - name: Test spoken language identification
# shell: bash
# run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,4 @@ log
vits-piper-*
vits-coqui-*
vits-mms-*
*.tar.bz2
5 changes: 4 additions & 1 deletion c-api-examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
add_executable(offline-tts-c-api offline-tts-c-api.c)
target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)

add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c)
target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)

if(SHERPA_ONNX_HAS_ALSA)
add_subdirectory(./asr-microphone-example)
else()
elseif((UNIX AND NOT APPLE) OR LINUX)
message(WARNING "Not include ./asr-microphone-example since alsa is not available")
endif()
2 changes: 1 addition & 1 deletion c-api-examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd)
CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
LDFLAGS := -L ../build/lib
LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
LDFLAGS += -framework Foundation
LDFLAGS += -lc++
LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
Expand Down
63 changes: 32 additions & 31 deletions c-api-examples/decode-file-c-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,55 +169,56 @@ int32_t main(int32_t argc, char *argv[]) {
int32_t segment_id = 0;

const char *wav_filename = argv[context.index];
FILE *fp = fopen(wav_filename, "rb");
if (!fp) {
fprintf(stderr, "Failed to open %s\n", wav_filename);
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
if (wave == NULL) {
fprintf(stderr, "Failed to read %s\n", wav_filename);
return -1;
}

// Assume the wave header occupies 44 bytes.
fseek(fp, 44, SEEK_SET);

// simulate streaming

#define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz

int16_t buffer[N];
float samples[N];
fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
wave->sample_rate, wave->num_samples,
(float)wave->num_samples / wave->sample_rate);

int32_t k = 0;
while (k < wave->num_samples) {
int32_t start = k;
int32_t end =
(start + N > wave->num_samples) ? wave->num_samples : (start + N);
k += N;

AcceptWaveform(stream, wave->sample_rate, wave->samples + start,
end - start);
while (IsOnlineStreamReady(recognizer, stream)) {
DecodeOnlineStream(recognizer, stream);
}

while (!feof(fp)) {
size_t n = fread((void *)buffer, sizeof(int16_t), N, fp);
if (n > 0) {
for (size_t i = 0; i != n; ++i) {
samples[i] = buffer[i] / 32768.;
}
AcceptWaveform(stream, 16000, samples, n);
while (IsOnlineStreamReady(recognizer, stream)) {
DecodeOnlineStream(recognizer, stream);
}
const SherpaOnnxOnlineRecognizerResult *r =
GetOnlineStreamResult(recognizer, stream);

const SherpaOnnxOnlineRecognizerResult *r =
GetOnlineStreamResult(recognizer, stream);
if (strlen(r->text)) {
SherpaOnnxPrint(display, segment_id, r->text);
}

if (IsEndpoint(recognizer, stream)) {
if (strlen(r->text)) {
SherpaOnnxPrint(display, segment_id, r->text);
++segment_id;
}

if (IsEndpoint(recognizer, stream)) {
if (strlen(r->text)) {
++segment_id;
}
Reset(recognizer, stream);
}

DestroyOnlineRecognizerResult(r);
Reset(recognizer, stream);
}

DestroyOnlineRecognizerResult(r);
}
fclose(fp);

// add some tail padding
float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate
AcceptWaveform(stream, 16000, tail_paddings, 4800);
AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800);

SherpaOnnxFreeWave(wave);

InputFinished(stream);
while (IsOnlineStreamReady(recognizer, stream)) {
Expand Down
65 changes: 65 additions & 0 deletions c-api-examples/spoken-language-identification-c-api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@

// We assume you have pre-downloaded the whisper multi-lingual models
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// An example command to download the "tiny" whisper model is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
// rm sherpa-onnx-whisper-tiny.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
SherpaOnnxSpokenLanguageIdentificationConfig config;

memset(&config, 0, sizeof(config));

config.whisper.encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
config.whisper.decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
config.num_threads = 1;
config.debug = 1;
config.provider = "cpu";

const SherpaOnnxSpokenLanguageIdentification *slid =
SherpaOnnxCreateSpokenLanguageIdentification(&config);
if (!slid) {
fprintf(stderr, "Failed to create spoken language identifier");
return -1;
}

// You can find more test waves from
// https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs
const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
if (wave == NULL) {
fprintf(stderr, "Failed to read %s\n", wav_filename);
return -1;
}

SherpaOnnxOfflineStream *stream =
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);

AcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);

const SherpaOnnxSpokenLanguageIdentificationResult *result =
SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream);

fprintf(stderr, "wav_filename: %s\n", wav_filename);
fprintf(stderr, "Detected language: %s\n", result->lang);

SherpaOnnxDestroySpokenLanguageIdentificationResult(result);
DestroyOfflineStream(stream);
SherpaOnnxFreeWave(wave);
SherpaOnnxDestroySpokenLanguageIdentification(slid);

return 0;
}
2 changes: 1 addition & 1 deletion dotnet-examples/offline-decode-files/run-hotwords.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -ex

if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
fi
Expand Down
2 changes: 1 addition & 1 deletion dotnet-examples/offline-decode-files/run-zipformer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -ex

if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
fi
Expand Down
2 changes: 1 addition & 1 deletion dotnet-examples/online-decode-files/run-transducer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

set -ex
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

Expand Down
Loading

0 comments on commit ab7cff2

Please sign in to comment.