Skip to content

Commit

Permalink
Revert "Replace TTS provider (#2278)"
Browse files Browse the repository at this point in the history
This reverts commit 1728443.
  • Loading branch information
Morb0 authored Jun 18, 2024
1 parent eddd8e6 commit 7d17965
Show file tree
Hide file tree
Showing 19 changed files with 4,857 additions and 582 deletions.
1 change: 1 addition & 0 deletions Content.Client/Credits/CreditsWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ void AddSection(string title, string path, bool markup = false)

AddSection(Loc.GetString("credits-window-contributors-section-title"), "GitHub.txt");
AddSection(Loc.GetString("credits-window-codebases-section-title"), "SpaceStation13.txt");
AddSection(Loc.GetString("credits-window-tts-title"), "TTS.txt"); // Corvax-TTS
AddSection(Loc.GetString("credits-window-original-remake-team-section-title"), "OriginalRemake.txt");
AddSection(Loc.GetString("credits-window-special-thanks-section-title"), "SpecialThanks.txt", true);

Expand Down
75 changes: 64 additions & 11 deletions Content.Server/Corvax/TTS/TTSManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using System.Web;
using Content.Shared.Corvax.CCCVars;
using Prometheus;
using Robust.Shared.Configuration;
Expand All @@ -19,9 +18,9 @@ public sealed class TTSManager
private static readonly Histogram RequestTimings = Metrics.CreateHistogram(
"tts_req_timings",
"Timings of TTS API requests",
new HistogramConfiguration
new HistogramConfiguration()
{
LabelNames = ["type"],
LabelNames = new[] {"type"},
Buckets = Histogram.ExponentialBuckets(.1, 1.5, 10),
});

Expand All @@ -42,6 +41,7 @@ public sealed class TTSManager
private readonly List<string> _cacheKeysSeq = new();
private int _maxCachedCount = 200;
private string _apiUrl = string.Empty;
private string _apiToken = string.Empty;

public void Initialize()
{
Expand All @@ -52,11 +52,7 @@ public void Initialize()
ResetCache();
}, true);
_cfg.OnValueChanged(CCCVars.TTSApiUrl, v => _apiUrl = v, true);
_cfg.OnValueChanged(CCCVars.TTSApiToken,
v =>
{
_httpClient.DefaultRequestHeaders.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", v);
}, true);
_cfg.OnValueChanged(CCCVars.TTSApiToken, v => _apiToken = v, true);
}

/// <summary>
Expand All @@ -78,12 +74,19 @@ public void Initialize()

_sawmill.Verbose($"Generate new audio for '{text}' speech by '{speaker}' speaker");

var body = new GenerateVoiceRequest
{
ApiToken = _apiToken,
Text = text,
Speaker = speaker,
};

var reqTime = DateTime.UtcNow;
try
{
var timeout = _cfg.GetCVar(CCCVars.TTSApiTimeout);
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(timeout));
var response = await _httpClient.GetAsync($"{_apiUrl}?speaker={speaker}&text={HttpUtility.UrlEncode(text)}&ext=ogg", cts.Token);
var response = await _httpClient.PostAsJsonAsync(_apiUrl, body, cts.Token);
if (!response.IsSuccessStatusCode)
{
if (response.StatusCode == HttpStatusCode.TooManyRequests)
Expand All @@ -96,7 +99,8 @@ public void Initialize()
return null;
}

var soundData = await response.Content.ReadAsByteArrayAsync(cancellationToken: cts.Token);
var json = await response.Content.ReadFromJsonAsync<GenerateVoiceResponse>(cancellationToken: cts.Token);
var soundData = Convert.FromBase64String(json.Results.First().Audio);

_cache.Add(cacheKey, soundData);
_cacheKeysSeq.Add(cacheKey);
Expand Down Expand Up @@ -135,9 +139,58 @@ public void ResetCache()
private string GenerateCacheKey(string speaker, string text)
{
var key = $"{speaker}/{text}";
var keyData = Encoding.UTF8.GetBytes(key);
byte[] keyData = Encoding.UTF8.GetBytes(key);
var sha256 = System.Security.Cryptography.SHA256.Create();
var bytes = sha256.ComputeHash(keyData);
return Convert.ToHexString(bytes);
}

private struct GenerateVoiceRequest
{
public GenerateVoiceRequest()
{
}

[JsonPropertyName("api_token")]
public string ApiToken { get; set; } = "";

[JsonPropertyName("text")]
public string Text { get; set; } = "";

[JsonPropertyName("speaker")]
public string Speaker { get; set; } = "";

[JsonPropertyName("ssml")]
public bool SSML { get; private set; } = true;

[JsonPropertyName("word_ts")]
public bool WordTS { get; private set; } = false;

[JsonPropertyName("put_accent")]
public bool PutAccent { get; private set; } = true;

[JsonPropertyName("put_yo")]
public bool PutYo { get; private set; } = false;

[JsonPropertyName("sample_rate")]
public int SampleRate { get; private set; } = 24000;

[JsonPropertyName("format")]
public string Format { get; private set; } = "ogg";
}

private struct GenerateVoiceResponse
{
[JsonPropertyName("results")]
public List<VoiceResult> Results { get; set; }

[JsonPropertyName("original_sha1")]
public string Hash { get; set; }
}

private struct VoiceResult
{
[JsonPropertyName("audio")]
public string Audio { get; set; }
}
}
23 changes: 23 additions & 0 deletions Content.Server/Corvax/TTS/TTSSystem.SSML.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
namespace Content.Server.Corvax.TTS;

// ReSharper disable once InconsistentNaming
public sealed partial class TTSSystem
{
private string ToSsmlText(string text, SoundTraits traits = SoundTraits.None)
{
var result = text;
if (traits.HasFlag(SoundTraits.RateFast))
result = $"<prosody rate=\"fast\">{result}</prosody>";
if (traits.HasFlag(SoundTraits.PitchVerylow))
result = $"<prosody pitch=\"x-low\">{result}</prosody>";
return $"<speak>{result}</speak>";
}

[Flags]
private enum SoundTraits : ushort
{
None = 0,
RateFast = 1 << 0,
PitchVerylow = 1 << 1,
}
}
19 changes: 15 additions & 4 deletions Content.Server/Corvax/TTS/TTSSystem.cs
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,23 @@ private async void HandleWhisper(EntityUid uid, string message, string obfMessag
if (char.IsLetter(textSanitized[^1]))
textSanitized += ".";

return await _ttsManager.ConvertTextToSpeech(speaker, textSanitized);
var ssmlTraits = SoundTraits.RateFast;
if (isWhisper)
ssmlTraits = SoundTraits.PitchVerylow;
var textSsml = ToSsmlText(textSanitized, ssmlTraits);

return await _ttsManager.ConvertTextToSpeech(speaker, textSsml);
}
}

public sealed class TransformSpeakerVoiceEvent(EntityUid sender, string voiceId) : EntityEventArgs
public sealed class TransformSpeakerVoiceEvent : EntityEventArgs
{
public EntityUid Sender = sender;
public string VoiceId = voiceId;
public EntityUid Sender;
public string VoiceId;

public TransformSpeakerVoiceEvent(EntityUid sender, string voiceId)
{
Sender = sender;
VoiceId = voiceId;
}
}
2 changes: 1 addition & 1 deletion Content.Shared/Corvax/CCCVars/CCCVars.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public sealed class CCCVars
/// URL of the TTS server API.
/// </summary>
public static readonly CVarDef<string> TTSApiUrl =
CVarDef.Create("tts.api_url", "https://ntts.fdev.team/api/v1/tts", CVar.SERVERONLY | CVar.ARCHIVE);
CVarDef.Create("tts.api_url", "", CVar.SERVERONLY | CVar.ARCHIVE);

/// <summary>
/// Auth token of the TTS server API.
Expand Down
8 changes: 4 additions & 4 deletions Content.Shared/Humanoid/SharedHumanoidAppearanceSystem.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ public abstract class SharedHumanoidAppearanceSystem : EntitySystem
[ValidatePrototypeId<SpeciesPrototype>]
public const string DefaultSpecies = "Human";
// Corvax-TTS-Start
public const string DefaultVoice = "johnny";
public const string DefaultVoice = "Garithos";
public static readonly Dictionary<Sex, string> DefaultSexVoice = new()
{
{Sex.Male, "johnny"},
{Sex.Female, "v_female"},
{Sex.Unsexed, "serana"},
{Sex.Male, "Garithos"},
{Sex.Female, "Maiev"},
{Sex.Unsexed, "Myron"},
};
// Corvax-TTS-End

Expand Down
1 change: 1 addition & 0 deletions Resources/Credits/TTS.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Silero.AI for providing API of their TTS (Text-To-Speech)
Loading

0 comments on commit 7d17965

Please sign in to comment.