diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs index 344cad86e..6e7d8ebf4 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs @@ -1,10 +1,40 @@ +using System; using System.Collections.Generic; - +using System.Linq; using OpenUtau.Api; +using OpenUtau.Core.G2p; namespace OpenUtau.Core.DiffSinger { [Phonemizer("DiffSinger Japanese Phonemizer", "DIFFS JA", language: "JA")] - public class DiffSingerJapanesePhonemizer : DiffSingerBasePhonemizer { + public class DiffSingerJapanesePhonemizer : DiffSingerG2pPhonemizer { protected override string GetDictionaryName()=>"dsdict-ja.yaml"; + protected override IG2p LoadBaseG2p() => new JapaneseMonophoneG2p(); + protected override string[] GetBaseG2pVowels() => new string[] { + "A", "AP", "E", "I", "N", "O", "SP", "U", + "a", "e", "i", "o", "u" + }; + + protected override string[] GetBaseG2pConsonants() => new string[] { + "b", "by", "ch", "cl", "d", "dy", "f", "g", "gw", "gy", "h", "hy", + "j", "k", "kw", "ky", "m", "my", "n", "ng", "ngy", "ny", "p", "py", + "r", "ry", "s", "sh", "t", "ts", "ty", "v", "w", "y", "z" + }; + + public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { + if (notes[0].lyric == "-") { + return MakeSimpleResult("SP"); + } + if (!partResult.TryGetValue(notes[0].position, out var phonemes)) { + throw new Exception("Part result not found"); + } + return new Result { + phonemes = phonemes + .Select((tu) => new Phoneme() { + phoneme = tu.Item1, + position = tu.Item2, + }) + .ToArray(), + }; + } } } diff --git a/OpenUtau.Core/G2p/Data/Resources.Designer.cs b/OpenUtau.Core/G2p/Data/Resources.Designer.cs index ed9d3451e..97165a05b 100644 --- a/OpenUtau.Core/G2p/Data/Resources.Designer.cs +++ b/OpenUtau.Core/G2p/Data/Resources.Designer.cs @@ -120,6 +120,16 @@ internal static byte[] g2p_it { } } + /// + /// Looks up a localized resource of type System.Byte[]. + /// + internal static byte[] g2p_ja_mono { + get { + object obj = ResourceManager.GetObject("g2p-ja-mono", resourceCulture); + return ((byte[])(obj)); + } + } + /// /// Looks up a localized resource of type System.Byte[]. /// @@ -133,9 +143,9 @@ internal static byte[] g2p_jyutping { /// /// Looks up a localized resource of type System.Byte[]. /// - internal static byte[] g2p_man { + internal static byte[] g2p_ko { get { - object obj = ResourceManager.GetObject("g2p-man", resourceCulture); + object obj = ResourceManager.GetObject("g2p-ko", resourceCulture); return ((byte[])(obj)); } } @@ -143,9 +153,9 @@ internal static byte[] g2p_man { /// /// Looks up a localized resource of type System.Byte[]. /// - internal static byte[] g2p_pt { + internal static byte[] g2p_man { get { - object obj = ResourceManager.GetObject("g2p-pt", resourceCulture); + object obj = ResourceManager.GetObject("g2p-man", resourceCulture); return ((byte[])(obj)); } } @@ -153,9 +163,9 @@ internal static byte[] g2p_pt { /// /// Looks up a localized resource of type System.Byte[]. /// - internal static byte[] g2p_ru { + internal static byte[] g2p_pt { get { - object obj = ResourceManager.GetObject("g2p-ru", resourceCulture); + object obj = ResourceManager.GetObject("g2p-pt", resourceCulture); return ((byte[])(obj)); } } @@ -163,9 +173,9 @@ internal static byte[] g2p_ru { /// /// Looks up a localized resource of type System.Byte[]. /// - internal static byte[] g2p_ko { + internal static byte[] g2p_ru { get { - object obj = ResourceManager.GetObject("g2p-ko", resourceCulture); + object obj = ResourceManager.GetObject("g2p-ru", resourceCulture); return ((byte[])(obj)); } } diff --git a/OpenUtau.Core/G2p/Data/Resources.resx b/OpenUtau.Core/G2p/Data/Resources.resx index fe1a0ffa5..c21c5752d 100644 --- a/OpenUtau.Core/G2p/Data/Resources.resx +++ b/OpenUtau.Core/G2p/Data/Resources.resx @@ -133,6 +133,9 @@ g2p-it.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + g2p-ja-mono.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + g2p-jyutping.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 diff --git a/OpenUtau.Core/G2p/Data/g2p-ja-mono.zip b/OpenUtau.Core/G2p/Data/g2p-ja-mono.zip new file mode 100644 index 000000000..09070157b Binary files /dev/null and b/OpenUtau.Core/G2p/Data/g2p-ja-mono.zip differ diff --git a/OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs b/OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs new file mode 100644 index 000000000..d77b306c4 --- /dev/null +++ b/OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs @@ -0,0 +1,126 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML.OnnxRuntime; +using OpenUtau.Api; +using OpenUtau.Core.Util; + +namespace OpenUtau.Core.G2p { + public class JapaneseMonophoneG2p : G2pPack { + private static readonly string[] graphemes = new string[] { + "", "", "", "", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "m", "n", "o", "p", "r", "s", + "t", "u", "v", "w", "y", "z", "あ", "い", "う", "え", + "お", "ぁ", "ぃ", "ぅ", "ぇ", "ぉ", "か", "き", "く", + "け", "こ", "さ", "し", "す", "せ", "そ", "ざ", "じ", "ず", + "ぜ", "ぞ", "た", "ち", "つ", "て", "と", "だ", "ぢ", "づ", "で", + "ど", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", + "ば", "び", "ぶ", "べ", "ぼ", "ぱ", "ぴ", "ぷ", "ぺ", "ぽ", "ま", + "み", "む", "め", "も", "や", "ゆ", "よ", "ゃ", "ゅ", "ょ", "ら", + "り", "る", "れ", "ろ", "わ", "を", "ん", "っ", "ヴ", "ゔ","゜", + "ゐ", "ゑ", "ア", "イ", "ウ", "エ", "オ", "ァ", "ィ", "ゥ", "ェ", + "ォ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", + "ザ", "ジ", "ズ", "ゼ", "ゾ", "タ", "チ", "ツ", "テ", "ト", "ダ", + "ヂ", "ヅ", "デ", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", + "フ", "ヘ", "ホ", "バ", "ビ", "ブ", "ベ", "ボ", "パ", "ピ", "プ", + "ペ", "ポ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ャ", + "ュ", "ョ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ヲ", "ン", "ッ", + "ヰ", "ヱ", "息", "吸", "-", "R" + }; + + private static readonly string[] phonemes = new string[] { + "", "", "", "", "A", "AP", "E", "I", "N", "O", "U", + "SP", "a", "b", "ch", "cl", "d", "dy", "e", "f", "g", "gw", + "gy", "h", "hy", "i", "j", "k", "kw", "ky", "m", "my", "n", + "ng", "ny", "o", "p", "py", "r", "ry", "s", "sh", "t", "ts", + "ty", "u", "v", "w", "y", "z" + }; + + private static object lockObj = new object(); + private static Dictionary graphemeIndexes; + private static IG2p hiragana; + private static IG2p katakana; + private static IG2p romaji; + private static IG2p special; + private static InferenceSession session; + private static Dictionary predCache = new Dictionary(); + + protected Tuple LoadPack( + byte[] data, + Func prepGrapheme = null, + Func prepPhoneme = null) { + prepGrapheme = prepGrapheme ?? ((string s) => s); + prepPhoneme = prepPhoneme ?? ((string s) => s); + string[] hiraganaTxt = Zip.ExtractText(data, "hiragana.txt"); + string[] katakanaTxt = Zip.ExtractText(data, "katakana.txt"); + string[] romajiTxt = Zip.ExtractText(data, "romaji.txt"); + string[] specialTxt = Zip.ExtractText(data, "special.txt"); + string[] phonesTxt = Zip.ExtractText(data, "phones.txt"); + var builder = G2pDictionary.NewBuilder(); + phonesTxt.Select(line => line.Trim()) + .Select(line => line.Split()) + .Where(parts => parts.Length == 2) + .ToList() + .ForEach(parts => builder.AddSymbol(prepPhoneme(parts[0]), parts[1])); + hiraganaTxt.Where(line => !line.StartsWith(";;;")) + .Select(line => line.Trim()) + .Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) + .Where(parts => parts.Length == 2) + .ToList() + .ForEach(parts => builder.AddEntry( + prepGrapheme(parts[0]), + parts[1].Split().Select(symbol => prepPhoneme(symbol)))); + katakanaTxt.Where(line => !line.StartsWith(";;;")) + .Select(line => line.Trim()) + .Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) + .Where(parts => parts.Length == 2) + .ToList() + .ForEach(parts => builder.AddEntry( + prepGrapheme(parts[0]), + parts[1].Split().Select(symbol => prepPhoneme(symbol)))); + romajiTxt.Where(line => !line.StartsWith(";;;")) + .Select(line => line.Trim()) + .Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) + .Where(parts => parts.Length == 2) + .ToList() + .ForEach(parts => builder.AddEntry( + prepGrapheme(parts[0]), + parts[1].Split().Select(symbol => prepPhoneme(symbol)))); + specialTxt.Where(line => !line.StartsWith(";;;")) + .Select(line => line.Trim()) + .Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) + .Where(parts => parts.Length == 2) + .ToList() + .ForEach(parts => builder.AddEntry( + prepGrapheme(parts[0]), + parts[1].Split().Select(symbol => prepPhoneme(symbol)))); + var dict = builder.Build(); + return Tuple.Create((IG2p) dict, session); + } + + public JapaneseMonophoneG2p() { + lock (lockObj) { + if (graphemeIndexes == null) { + graphemeIndexes = graphemes + .Skip(4) + .Select((g, i) => Tuple.Create(g, i)) + .ToDictionary(t => t.Item1, t => t.Item2 + 4); + var tuple = LoadPack(Data.Resources.g2p_ja_mono); + hiragana = tuple.Item1; + katakana = tuple.Item1; + romaji = tuple.Item1; + special = tuple.Item1; + session = tuple.Item2; + } + } + GraphemeIndexes = graphemeIndexes; + Phonemes = phonemes; + Session = session; + Dict = hiragana; + Dict = katakana; + Dict = romaji; + Dict = special; + PredCache = predCache; + } + } +}