From c5bd6b70a2957e251b8a981d73e44fd46cb14579 Mon Sep 17 00:00:00 2001
From: Spatison <137375981+Spatison@users.noreply.github.com>
Date: Fri, 6 Dec 2024 01:47:06 +1000
Subject: [PATCH] [Fix] TTS (#137)
* TTS fix
* TTS fix
* TTS fix
* RedFoxIV revie
---
Content.Server/_White/TTS/TTSManager.cs | 32 +++++++++++--------
Content.Server/_White/TTS/TTSSystem.cs | 29 +++--------------
.../_White/TTS/TTSPitchRateSystem.cs | 25 ++++++++-------
.../Prototypes/Entities/Mobs/Species/base.yml | 1 +
4 files changed, 38 insertions(+), 49 deletions(-)
diff --git a/Content.Server/_White/TTS/TTSManager.cs b/Content.Server/_White/TTS/TTSManager.cs
index 26b558ac05..e108f236e1 100644
--- a/Content.Server/_White/TTS/TTSManager.cs
+++ b/Content.Server/_White/TTS/TTSManager.cs
@@ -50,7 +50,7 @@ public void Initialize()
/// Identifier of speaker
/// SSML formatted text
/// OGG audio bytes
- public async Task ConvertTextToSpeech(string speaker, string text, string pitch, string rate, string? effect = null)
+ public async Task ConvertTextToSpeech(string speaker, string text)
{
var url = _cfg.GetCVar(WhiteCVars.TTSApiUrl);
if (string.IsNullOrWhiteSpace(url))
@@ -80,10 +80,7 @@ public void Initialize()
{
ApiToken = token,
Text = text,
- Speaker = speaker,
- Pitch = pitch,
- Rate = rate,
- Effect = effect
+ Speaker = speaker
};
var reqTime = DateTime.UtcNow;
@@ -143,19 +140,28 @@ private record GenerateVoiceRequest
public string ApiToken { get; set; } = "";
[JsonPropertyName("text")]
- public string Text { get; set; } = default!;
+ public string Text { get; set; } = "";
[JsonPropertyName("speaker")]
- public string Speaker { get; set; } = default!;
+ public string Speaker { get; set; } = "";
- [JsonPropertyName("pitch")]
- public string Pitch { get; set; } = default!;
+ [JsonPropertyName("ssml")]
+ public bool SSML { get; private set; } = true;
- [JsonPropertyName("rate")]
- public string Rate { get; set; } = default!;
+ [JsonPropertyName("word_ts")]
+ public bool WordTS { get; private set; } = false;
- [JsonPropertyName("effect")]
- public string? Effect { get; set; }
+ [JsonPropertyName("put_accent")]
+ public bool PutAccent { get; private set; } = true;
+
+ [JsonPropertyName("put_yo")]
+ public bool PutYo { get; private set; } = false;
+
+ [JsonPropertyName("sample_rate")]
+ public int SampleRate { get; private set; } = 24000;
+
+ [JsonPropertyName("format")]
+ public string Format { get; private set; } = "ogg";
}
private struct GenerateVoiceResponse
diff --git a/Content.Server/_White/TTS/TTSSystem.cs b/Content.Server/_White/TTS/TTSSystem.cs
index 611cbd6e74..3ba5930e8c 100644
--- a/Content.Server/_White/TTS/TTSSystem.cs
+++ b/Content.Server/_White/TTS/TTSSystem.cs
@@ -113,7 +113,7 @@ private async void OnAnnounceRequest(TtsAnnouncementEvent ev)
if (!_prototypeManager.TryIndex(ev.VoiceId, out var ttsPrototype))
return;
var message = FormattedMessage.RemoveMarkup(ev.Message);
- var soundData = await GenerateTTS(null, message, ttsPrototype.Speaker, speechRate: "slow", effect: "announce");
+ var soundData = await GenerateTTS(null, message, ttsPrototype.Speaker, speechRate: "slow");
if (soundData == null)
return;
Filter filter;
@@ -184,35 +184,14 @@ private async void OnRequestTTS(MsgRequestTTS ev)
RaiseNetworkEvent(new PlayTTSEvent(ev.Uid, soundData, false), Filter.SinglePlayer(session), false);
}
- private async Task GenerateTTS(EntityUid? uid, string text, string speaker, string? speechPitch = null,
- string? speechRate = null, string? effect = null)
+ private async Task GenerateTTS(EntityUid? uid, string text, string speaker, string? speechRate = null, string? speechPitch = null)
{
var textSanitized = Sanitize(text);
if (textSanitized == "")
return null;
- string pitch;
- string rate;
- if (speechPitch == null || speechRate == null)
- {
- if (uid == null || !_ttsPitchRateSystem.TryGetPitchRate(uid.Value, out var pitchRate))
- {
- pitch = "medium";
- rate = "medium";
- }
- else
- {
- pitch = pitchRate.Pitch;
- rate = pitchRate.Rate;
- }
- }
- else
- {
- pitch = speechPitch;
- rate = speechRate;
- }
-
- return await _ttsManager.ConvertTextToSpeech(speaker, textSanitized, pitch, rate, effect);
+ textSanitized = _ttsPitchRateSystem.GetFormattedSpeechText(uid, textSanitized, speechRate, speechPitch);
+ return await _ttsManager.ConvertTextToSpeech(speaker, textSanitized);
}
}
diff --git a/Content.Shared/_White/TTS/TTSPitchRateSystem.cs b/Content.Shared/_White/TTS/TTSPitchRateSystem.cs
index dfe22bb32c..6ce460e575 100644
--- a/Content.Shared/_White/TTS/TTSPitchRateSystem.cs
+++ b/Content.Shared/_White/TTS/TTSPitchRateSystem.cs
@@ -1,4 +1,3 @@
-using System.Diagnostics.CodeAnalysis;
using Content.Shared.Humanoid;
using Content.Shared.Humanoid.Prototypes;
using Robust.Shared.Prototypes;
@@ -18,21 +17,25 @@ public sealed class TTSPitchRateSystem : EntitySystem
["Reptilian"] = new TTSPitchRate("low", "slow"),
};
- public bool TryGetPitchRate(EntityUid uid, [NotNullWhen(true)] out TTSPitchRate? pitch)
+ public string GetFormattedSpeechText(EntityUid? uid, string text, string? speechRate = null, string? speechPitch = null)
{
- if (!TryComp(uid, out var humanoid))
+ var ssml = text;
+ if (TryComp(uid, out var humanoid))
{
- pitch = new TTSPitchRate();
- return false;
+ var species = SpeciesPitches.GetValueOrDefault(humanoid.Species);
+ if (species != null)
+ {
+ speechRate ??= species.Rate;
+ speechPitch ??= species.Pitch;
+ }
}
- pitch = GetPitchRate(humanoid.Species);
- return pitch != null;
- }
+ if (speechRate != null)
+ ssml = $"{ssml}";
+ if (speechPitch != null)
+ ssml = $"{ssml}";
- public TTSPitchRate? GetPitchRate(ProtoId protoId)
- {
- return SpeciesPitches.GetValueOrDefault(protoId);
+ return $"{ssml}";
}
}
diff --git a/Resources/Prototypes/Entities/Mobs/Species/base.yml b/Resources/Prototypes/Entities/Mobs/Species/base.yml
index 24ee2a964a..ff8a16a467 100644
--- a/Resources/Prototypes/Entities/Mobs/Species/base.yml
+++ b/Resources/Prototypes/Entities/Mobs/Species/base.yml
@@ -230,6 +230,7 @@
- DoorBumpOpener
- type: Targeting
- type: SurgeryTarget
+ - type: TTS
- type: entity
save: false