From 6b2f550341a9c9a527156e355d19c4ee8612ab0d Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 1 Dec 2024 21:20:05 +0800 Subject: [PATCH] Add ArtTS for text-to-speech --- .../sherpa_onnx/BuildProfile.ets | 4 +- .../src/main/cpp/non-streaming-tts.cc | 20 ++++++ .../main/cpp/types/libsherpa_onnx/Index.d.ts | 5 ++ .../main/ets/components/NonStreamingAsr.ets | 2 +- .../main/ets/components/NonStreamingTts.ets | 66 +++++++++++++++++++ .../src/main/ets/components/StreamingAsr.ets | 4 +- 6 files changed, 96 insertions(+), 5 deletions(-) create mode 100644 harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets index 502c150fd..c6564edc7 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets @@ -2,8 +2,8 @@ * Use these variables when you tailor your ArkTS code. They must be of the const type. */ export const HAR_VERSION = '1.10.32'; -export const BUILD_MODE_NAME = 'release'; -export const DEBUG = false; +export const BUILD_MODE_NAME = 'debug'; +export const DEBUG = true; export const TARGET_NAME = 'default'; /** diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc index 70d97cddb..da70e662c 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc @@ -63,6 +63,17 @@ static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig( static Napi::External CreateOfflineTtsWrapper( const Napi::CallbackInfo &info) { Napi::Env env = info.Env(); +#if __OHOS__ + // the last argument is the NativeResourceManager + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } +#else if (info.Length() != 1) { std::ostringstream os; os << "Expect only 1 argument. Given: " << info.Length(); @@ -71,6 +82,7 @@ static Napi::External CreateOfflineTtsWrapper( return {}; } +#endif if (!info[0].IsObject()) { Napi::TypeError::New(env, "Expect an object as the argument") @@ -90,7 +102,15 @@ static Napi::External CreateOfflineTtsWrapper( SHERPA_ONNX_ASSIGN_ATTR_INT32(max_num_sentences, maxNumSentences); SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars); +#if __OHOS__ + std::unique_ptr + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]), + &OH_ResourceManager_ReleaseNativeResourceManager); + SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTtsOHOS(&c, mgr.get()); +#else SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&c); +#endif if (c.model.vits.model) { delete[] c.model.vits.model; diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts index 10ff7745c..f44ade356 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts @@ -33,3 +33,8 @@ export const decodeOnlineStream: (handle: object, streamHandle: object) => void; export const isEndpoint: (handle: object, streamHandle: object) => boolean; export const reset: (handle: object, streamHandle: object) => void; export const getOnlineStreamResultAsJson: (handle: object, streamHandle: object) => string; + +export const createOfflineTts: (config: object, mgr?: object) => object; +export const getOfflineTtsNumSpeakers: (handle: object) => number; +export const getOfflineTtsSampleRate: (handle: object) => number; +export const offlineTtsGenerate: (handle: object, input: object) => object; diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets index 0cc8466a9..d3f849cca 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets @@ -79,7 +79,7 @@ export class OfflineModelConfig { public tokens: string = ''; public numThreads: number = 1; public debug: boolean = false; - public provider: string = "cpu"; + public provider: string = 'cpu'; public modelType: string = ''; public modelingUnit: string = "cjkchar"; public bpeVocab: string = ''; diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets new file mode 100644 index 000000000..c568b9990 --- /dev/null +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets @@ -0,0 +1,66 @@ +import { + createOfflineTts, + getOfflineTtsNumSpeakers, + getOfflineTtsSampleRate, + offlineTtsGenerate, +} from "libsherpa_onnx.so"; + +export class OfflineTtsVitsModelConfig { + public model: string = ''; + public lexicon: string = ''; + public tokens: string = ''; + public dataDir: string = ''; + public dictDir: String = ''; + public noiseScale: number = 0.667; + public noiseScaleW: number = 0.8; + public lengthScale: number = 1.0; +} + +export class OfflineTtsModelConfig{ + public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig(); + public numThreads: number = 1; + public debug: boolean = false; + public provider: string = 'cpu'; +} + +export class OfflineTtsConfig{ + public model: OfflineTtsModelConfig = new OfflineTtsModelConfig(); + public ruleFsts: string = ''; + public ruleFars: string = ''; + public maxNumSentences: number = 1; +} + +export class TtsOutput { + public samples: Float32Array = new Float32Array(0); + public sampleRate: number = 0; +} + +export class TtsInput { + public text: string = ''; + public sid: number = 0; + public speed: number = 1.0; +} + +export class OfflineTts { + private handle: object; + public config: OfflineTtsConfig; + public numSpeakers: number; + public sampleRate: number; + constructor(config: OfflineTtsConfig, mgr?: object) { + this.handle = createOfflineTts(config, mgr); + this.config = config; + + this.numSpeakers = getOfflineTtsNumSpeakers(this.handle); + this.sampleRate = getOfflineTtsSampleRate(this.handle); + } + + /* + input obj: {text: "xxxx", sid: 0, speed: 1.0} + where text is a string, sid is a int32, speed is a float + + return an object {samples: Float32Array, sampleRate: } + */ + generate(input: TtsInput): TtsOutput { + return offlineTtsGenerate(this.handle, input) as TtsOutput; + } +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets index 7ecc552ca..3b2985771 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets @@ -52,7 +52,7 @@ export class OnlineModelConfig { public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig(); public tokens: string = ''; public numThreads: number = 1; - public provider: string = "cpu"; + public provider: string = 'cpu'; public debug: boolean = false; public modelType: string = ''; public modelingUnit: string = "cjkchar"; @@ -67,7 +67,7 @@ export class OnlineCtcFstDecoderConfig { export class OnlineRecognizerConfig { public featConfig: FeatureConfig = new FeatureConfig(); public modelConfig: OnlineModelConfig = new OnlineModelConfig(); - public decodingMethod: string = "greedy_search"; + public decodingMethod: string = 'greedy_search'; public maxActivePaths: number = 4; public enableEndpoint: boolean = false; public rule1MinTrailingSilence: number = 2.4;