From c267218e497fc4b1d30aa4f4e7cae1d3c6745542 Mon Sep 17 00:00:00 2001 From: yqzhishen Date: Mon, 26 Aug 2024 23:19:29 +0800 Subject: [PATCH] Add checks for phoneme definitions in dsdict.yaml --- OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs | 9 ++++++++- OpenUtau.Core/DiffSinger/DiffSingerPitch.cs | 9 +++++++++ OpenUtau.Core/DiffSinger/DiffSingerVariance.cs | 9 +++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs index 0305bce39..d37fce7fd 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs @@ -153,12 +153,19 @@ protected bool IsSyllableVowelExtensionNote(Note note) { /// distribute phonemes to each note inside the group /// List ProcessWord(Note[] notes, string[] symbols){ + //Check if all phonemes are defined in dsdict.yaml (for their types) + foreach (var symbol in symbols) { + if (!g2p.IsValidSymbol(symbol)) { + throw new InvalidDataException( + $"Type definition of symbol \"{symbol}\" not found. Consider adding it to dsdict.yaml (or dsdict-.yaml) of the phonemizer."); + } + } var wordPhonemes = new List{ new phonemesPerNote(-1, notes[0].tone) }; var dsPhonemes = symbols .Select((symbol, index) => new dsPhoneme(symbol, GetSpeakerAtIndex(notes[0], index))) - .ToArray(); + .ToArray(); var isVowel = dsPhonemes.Select(s => g2p.IsVowel(s.Symbol)).ToArray(); var isGlide = dsPhonemes.Select(s => g2p.IsGlide(s.Symbol)).ToArray(); var nonExtensionNotes = notes.Where(n=>!IsSyllableVowelExtensionNote(n)).ToArray(); diff --git a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs index 2bf4f61d3..cddf90ff4 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs @@ -93,6 +93,15 @@ public RenderPitchResult Process(RenderPhrase phrase){ var endMs = phrase.notes[^1].endMs + tailMs; int headFrames = (int)Math.Round(headMs / frameMs); int tailFrames = (int)Math.Round(tailMs / frameMs); + if (dsConfig.predict_dur || dsConfig.use_note_rest) { + //Check if all phonemes are defined in dsdict.yaml (for their types) + foreach (var phone in phrase.phones) { + if (!g2p.IsValidSymbol(phone.phoneme)) { + throw new InvalidDataException( + $"Type definition of symbol \"{phone.phoneme}\" not found. Consider adding it to dsdict.yaml of the pitch predictor."); + } + } + } //Linguistic Encoder var linguisticInputs = new List(); var tokens = phrase.phones diff --git a/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs b/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs index c0e97612e..b1aa80ebd 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs @@ -87,6 +87,15 @@ int PhonemeTokenize(string phoneme){ public VarianceResult Process(RenderPhrase phrase){ int headFrames = (int)Math.Round(headMs / frameMs); int tailFrames = (int)Math.Round(tailMs / frameMs); + if (dsConfig.predict_dur) { + //Check if all phonemes are defined in dsdict.yaml (for their types) + foreach (var phone in phrase.phones) { + if (!g2p.IsValidSymbol(phone.phoneme)) { + throw new InvalidDataException( + $"Type definition of symbol \"{phone.phoneme}\" not found. Consider adding it to dsdict.yaml of the variance predictor."); + } + } + } //Linguistic Encoder var linguisticInputs = new List(); var tokens = phrase.phones.Select(p => p.phoneme)