From c5bd6b70a2957e251b8a981d73e44fd46cb14579 Mon Sep 17 00:00:00 2001 From: Spatison <137375981+Spatison@users.noreply.github.com> Date: Fri, 6 Dec 2024 01:47:06 +1000 Subject: [PATCH] [Fix] TTS (#137) * TTS fix * TTS fix * TTS fix * RedFoxIV revie --- Content.Server/_White/TTS/TTSManager.cs | 32 +++++++++++-------- Content.Server/_White/TTS/TTSSystem.cs | 29 +++-------------- .../_White/TTS/TTSPitchRateSystem.cs | 25 ++++++++------- .../Prototypes/Entities/Mobs/Species/base.yml | 1 + 4 files changed, 38 insertions(+), 49 deletions(-) diff --git a/Content.Server/_White/TTS/TTSManager.cs b/Content.Server/_White/TTS/TTSManager.cs index 26b558ac05..e108f236e1 100644 --- a/Content.Server/_White/TTS/TTSManager.cs +++ b/Content.Server/_White/TTS/TTSManager.cs @@ -50,7 +50,7 @@ public void Initialize() /// Identifier of speaker /// SSML formatted text /// OGG audio bytes - public async Task ConvertTextToSpeech(string speaker, string text, string pitch, string rate, string? effect = null) + public async Task ConvertTextToSpeech(string speaker, string text) { var url = _cfg.GetCVar(WhiteCVars.TTSApiUrl); if (string.IsNullOrWhiteSpace(url)) @@ -80,10 +80,7 @@ public void Initialize() { ApiToken = token, Text = text, - Speaker = speaker, - Pitch = pitch, - Rate = rate, - Effect = effect + Speaker = speaker }; var reqTime = DateTime.UtcNow; @@ -143,19 +140,28 @@ private record GenerateVoiceRequest public string ApiToken { get; set; } = ""; [JsonPropertyName("text")] - public string Text { get; set; } = default!; + public string Text { get; set; } = ""; [JsonPropertyName("speaker")] - public string Speaker { get; set; } = default!; + public string Speaker { get; set; } = ""; - [JsonPropertyName("pitch")] - public string Pitch { get; set; } = default!; + [JsonPropertyName("ssml")] + public bool SSML { get; private set; } = true; - [JsonPropertyName("rate")] - public string Rate { get; set; } = default!; + [JsonPropertyName("word_ts")] + public bool WordTS { get; private set; } = false; - [JsonPropertyName("effect")] - public string? Effect { get; set; } + [JsonPropertyName("put_accent")] + public bool PutAccent { get; private set; } = true; + + [JsonPropertyName("put_yo")] + public bool PutYo { get; private set; } = false; + + [JsonPropertyName("sample_rate")] + public int SampleRate { get; private set; } = 24000; + + [JsonPropertyName("format")] + public string Format { get; private set; } = "ogg"; } private struct GenerateVoiceResponse diff --git a/Content.Server/_White/TTS/TTSSystem.cs b/Content.Server/_White/TTS/TTSSystem.cs index 611cbd6e74..3ba5930e8c 100644 --- a/Content.Server/_White/TTS/TTSSystem.cs +++ b/Content.Server/_White/TTS/TTSSystem.cs @@ -113,7 +113,7 @@ private async void OnAnnounceRequest(TtsAnnouncementEvent ev) if (!_prototypeManager.TryIndex(ev.VoiceId, out var ttsPrototype)) return; var message = FormattedMessage.RemoveMarkup(ev.Message); - var soundData = await GenerateTTS(null, message, ttsPrototype.Speaker, speechRate: "slow", effect: "announce"); + var soundData = await GenerateTTS(null, message, ttsPrototype.Speaker, speechRate: "slow"); if (soundData == null) return; Filter filter; @@ -184,35 +184,14 @@ private async void OnRequestTTS(MsgRequestTTS ev) RaiseNetworkEvent(new PlayTTSEvent(ev.Uid, soundData, false), Filter.SinglePlayer(session), false); } - private async Task GenerateTTS(EntityUid? uid, string text, string speaker, string? speechPitch = null, - string? speechRate = null, string? effect = null) + private async Task GenerateTTS(EntityUid? uid, string text, string speaker, string? speechRate = null, string? speechPitch = null) { var textSanitized = Sanitize(text); if (textSanitized == "") return null; - string pitch; - string rate; - if (speechPitch == null || speechRate == null) - { - if (uid == null || !_ttsPitchRateSystem.TryGetPitchRate(uid.Value, out var pitchRate)) - { - pitch = "medium"; - rate = "medium"; - } - else - { - pitch = pitchRate.Pitch; - rate = pitchRate.Rate; - } - } - else - { - pitch = speechPitch; - rate = speechRate; - } - - return await _ttsManager.ConvertTextToSpeech(speaker, textSanitized, pitch, rate, effect); + textSanitized = _ttsPitchRateSystem.GetFormattedSpeechText(uid, textSanitized, speechRate, speechPitch); + return await _ttsManager.ConvertTextToSpeech(speaker, textSanitized); } } diff --git a/Content.Shared/_White/TTS/TTSPitchRateSystem.cs b/Content.Shared/_White/TTS/TTSPitchRateSystem.cs index dfe22bb32c..6ce460e575 100644 --- a/Content.Shared/_White/TTS/TTSPitchRateSystem.cs +++ b/Content.Shared/_White/TTS/TTSPitchRateSystem.cs @@ -1,4 +1,3 @@ -using System.Diagnostics.CodeAnalysis; using Content.Shared.Humanoid; using Content.Shared.Humanoid.Prototypes; using Robust.Shared.Prototypes; @@ -18,21 +17,25 @@ public sealed class TTSPitchRateSystem : EntitySystem ["Reptilian"] = new TTSPitchRate("low", "slow"), }; - public bool TryGetPitchRate(EntityUid uid, [NotNullWhen(true)] out TTSPitchRate? pitch) + public string GetFormattedSpeechText(EntityUid? uid, string text, string? speechRate = null, string? speechPitch = null) { - if (!TryComp(uid, out var humanoid)) + var ssml = text; + if (TryComp(uid, out var humanoid)) { - pitch = new TTSPitchRate(); - return false; + var species = SpeciesPitches.GetValueOrDefault(humanoid.Species); + if (species != null) + { + speechRate ??= species.Rate; + speechPitch ??= species.Pitch; + } } - pitch = GetPitchRate(humanoid.Species); - return pitch != null; - } + if (speechRate != null) + ssml = $"{ssml}"; + if (speechPitch != null) + ssml = $"{ssml}"; - public TTSPitchRate? GetPitchRate(ProtoId protoId) - { - return SpeciesPitches.GetValueOrDefault(protoId); + return $"{ssml}"; } } diff --git a/Resources/Prototypes/Entities/Mobs/Species/base.yml b/Resources/Prototypes/Entities/Mobs/Species/base.yml index 24ee2a964a..ff8a16a467 100644 --- a/Resources/Prototypes/Entities/Mobs/Species/base.yml +++ b/Resources/Prototypes/Entities/Mobs/Species/base.yml @@ -230,6 +230,7 @@ - DoorBumpOpener - type: Targeting - type: SurgeryTarget + - type: TTS - type: entity save: false