Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Japanese monophone G2P (tailored to AI voicebanks/phonemizers) + add support to Diffsinger Japanese Phonemizer #1147

Merged
merged 5 commits into from
Jun 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,10 +1,40 @@
using System;
using System.Collections.Generic;

using System.Linq;
using OpenUtau.Api;
using OpenUtau.Core.G2p;

namespace OpenUtau.Core.DiffSinger {
[Phonemizer("DiffSinger Japanese Phonemizer", "DIFFS JA", language: "JA")]
public class DiffSingerJapanesePhonemizer : DiffSingerBasePhonemizer {
public class DiffSingerJapanesePhonemizer : DiffSingerG2pPhonemizer {
protected override string GetDictionaryName()=>"dsdict-ja.yaml";
protected override IG2p LoadBaseG2p() => new JapaneseMonophoneG2p();
protected override string[] GetBaseG2pVowels() => new string[] {
"A", "AP", "E", "I", "N", "O", "SP", "U",
"a", "e", "i", "o", "u"
};

protected override string[] GetBaseG2pConsonants() => new string[] {
"b", "by", "ch", "cl", "d", "dy", "f", "g", "gw", "gy", "h", "hy",
"j", "k", "kw", "ky", "m", "my", "n", "ng", "ngy", "ny", "p", "py",
"r", "ry", "s", "sh", "t", "ts", "ty", "v", "w", "y", "z"
};

public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) {
if (notes[0].lyric == "-") {
return MakeSimpleResult("SP");
}
if (!partResult.TryGetValue(notes[0].position, out var phonemes)) {
throw new Exception("Part result not found");
}
return new Result {
phonemes = phonemes
.Select((tu) => new Phoneme() {
phoneme = tu.Item1,
position = tu.Item2,
})
.ToArray(),
};
}
}
}
26 changes: 18 additions & 8 deletions OpenUtau.Core/G2p/Data/Resources.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions OpenUtau.Core/G2p/Data/Resources.resx
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@
<data name="g2p-it" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>g2p-it.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
<data name="g2p-ja-mono" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>g2p-ja-mono.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
<data name="g2p-jyutping" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>g2p-jyutping.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
Expand Down
Binary file added OpenUtau.Core/G2p/Data/g2p-ja-mono.zip
Binary file not shown.
126 changes: 126 additions & 0 deletions OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.OnnxRuntime;
using OpenUtau.Api;
using OpenUtau.Core.Util;

namespace OpenUtau.Core.G2p {
public class JapaneseMonophoneG2p : G2pPack {
private static readonly string[] graphemes = new string[] {
"", "", "", "", "a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "m", "n", "o", "p", "r", "s",
"t", "u", "v", "w", "y", "z", "あ", "い", "う", "え",
"お", "ぁ", "ぃ", "ぅ", "ぇ", "ぉ", "か", "き", "く",
"け", "こ", "さ", "し", "す", "せ", "そ", "ざ", "じ", "ず",
"ぜ", "ぞ", "た", "ち", "つ", "て", "と", "だ", "ぢ", "づ", "で",
"ど", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ",
"ば", "び", "ぶ", "べ", "ぼ", "ぱ", "ぴ", "ぷ", "ぺ", "ぽ", "ま",
"み", "む", "め", "も", "や", "ゆ", "よ", "ゃ", "ゅ", "ょ", "ら",
"り", "る", "れ", "ろ", "わ", "を", "ん", "っ", "ヴ", "ゔ","゜",
"ゐ", "ゑ", "ア", "イ", "ウ", "エ", "オ", "ァ", "ィ", "ゥ", "ェ",
"ォ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ",
"ザ", "ジ", "ズ", "ゼ", "ゾ", "タ", "チ", "ツ", "テ", "ト", "ダ",
"ヂ", "ヅ", "デ", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ",
"フ", "ヘ", "ホ", "バ", "ビ", "ブ", "ベ", "ボ", "パ", "ピ", "プ",
"ペ", "ポ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ャ",
"ュ", "ョ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ヲ", "ン", "ッ",
"ヰ", "ヱ", "息", "吸", "-", "R"
};

private static readonly string[] phonemes = new string[] {
"", "", "", "", "A", "AP", "E", "I", "N", "O", "U",
"SP", "a", "b", "ch", "cl", "d", "dy", "e", "f", "g", "gw",
"gy", "h", "hy", "i", "j", "k", "kw", "ky", "m", "my", "n",
"ng", "ny", "o", "p", "py", "r", "ry", "s", "sh", "t", "ts",
"ty", "u", "v", "w", "y", "z"
};

private static object lockObj = new object();
private static Dictionary<string, int> graphemeIndexes;
private static IG2p hiragana;
private static IG2p katakana;
private static IG2p romaji;
private static IG2p special;
private static InferenceSession session;
private static Dictionary<string, string[]> predCache = new Dictionary<string, string[]>();

protected Tuple<IG2p, InferenceSession> LoadPack(

Check warning on line 48 in OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs

View workflow job for this annotation

GitHub Actions / pr-test (windows-latest, win-x64)

'JapaneseMonophoneG2p.LoadPack(byte[], Func<string, string>, Func<string, string>)' hides inherited member 'G2pPack.LoadPack(byte[], Func<string, string>, Func<string, string>)'. Use the new keyword if hiding was intended.

Check warning on line 48 in OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs

View workflow job for this annotation

GitHub Actions / pr-test (macos-13, osx-x64)

'JapaneseMonophoneG2p.LoadPack(byte[], Func<string, string>, Func<string, string>)' hides inherited member 'G2pPack.LoadPack(byte[], Func<string, string>, Func<string, string>)'. Use the new keyword if hiding was intended.

Check warning on line 48 in OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs

View workflow job for this annotation

GitHub Actions / pr-test (ubuntu-latest, linux-x64)

'JapaneseMonophoneG2p.LoadPack(byte[], Func<string, string>, Func<string, string>)' hides inherited member 'G2pPack.LoadPack(byte[], Func<string, string>, Func<string, string>)'. Use the new keyword if hiding was intended.
byte[] data,
Func<string, string> prepGrapheme = null,

Check warning on line 50 in OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs

View workflow job for this annotation

GitHub Actions / pr-test (windows-latest, win-x64)

Cannot convert null literal to non-nullable reference type.

Check warning on line 50 in OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs

View workflow job for this annotation

GitHub Actions / pr-test (macos-13, osx-x64)

Cannot convert null literal to non-nullable reference type.

Check warning on line 50 in OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs

View workflow job for this annotation

GitHub Actions / pr-test (ubuntu-latest, linux-x64)

Cannot convert null literal to non-nullable reference type.
Func<string, string> prepPhoneme = null) {

Check warning on line 51 in OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs

View workflow job for this annotation

GitHub Actions / pr-test (windows-latest, win-x64)

Cannot convert null literal to non-nullable reference type.

Check warning on line 51 in OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs

View workflow job for this annotation

GitHub Actions / pr-test (macos-13, osx-x64)

Cannot convert null literal to non-nullable reference type.

Check warning on line 51 in OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs

View workflow job for this annotation

GitHub Actions / pr-test (ubuntu-latest, linux-x64)

Cannot convert null literal to non-nullable reference type.
prepGrapheme = prepGrapheme ?? ((string s) => s);
prepPhoneme = prepPhoneme ?? ((string s) => s);
string[] hiraganaTxt = Zip.ExtractText(data, "hiragana.txt");
string[] katakanaTxt = Zip.ExtractText(data, "katakana.txt");
string[] romajiTxt = Zip.ExtractText(data, "romaji.txt");
string[] specialTxt = Zip.ExtractText(data, "special.txt");
string[] phonesTxt = Zip.ExtractText(data, "phones.txt");
var builder = G2pDictionary.NewBuilder();
phonesTxt.Select(line => line.Trim())
.Select(line => line.Split())
.Where(parts => parts.Length == 2)
.ToList()
.ForEach(parts => builder.AddSymbol(prepPhoneme(parts[0]), parts[1]));
hiraganaTxt.Where(line => !line.StartsWith(";;;"))
.Select(line => line.Trim())
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None))
.Where(parts => parts.Length == 2)
.ToList()
.ForEach(parts => builder.AddEntry(
prepGrapheme(parts[0]),
parts[1].Split().Select(symbol => prepPhoneme(symbol))));
katakanaTxt.Where(line => !line.StartsWith(";;;"))
.Select(line => line.Trim())
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None))
.Where(parts => parts.Length == 2)
.ToList()
.ForEach(parts => builder.AddEntry(
prepGrapheme(parts[0]),
parts[1].Split().Select(symbol => prepPhoneme(symbol))));
romajiTxt.Where(line => !line.StartsWith(";;;"))
.Select(line => line.Trim())
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None))
.Where(parts => parts.Length == 2)
.ToList()
.ForEach(parts => builder.AddEntry(
prepGrapheme(parts[0]),
parts[1].Split().Select(symbol => prepPhoneme(symbol))));
specialTxt.Where(line => !line.StartsWith(";;;"))
.Select(line => line.Trim())
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None))
.Where(parts => parts.Length == 2)
.ToList()
.ForEach(parts => builder.AddEntry(
prepGrapheme(parts[0]),
parts[1].Split().Select(symbol => prepPhoneme(symbol))));
var dict = builder.Build();
return Tuple.Create((IG2p) dict, session);
}

public JapaneseMonophoneG2p() {
lock (lockObj) {
if (graphemeIndexes == null) {
graphemeIndexes = graphemes
.Skip(4)
.Select((g, i) => Tuple.Create(g, i))
.ToDictionary(t => t.Item1, t => t.Item2 + 4);
var tuple = LoadPack(Data.Resources.g2p_ja_mono);
hiragana = tuple.Item1;
katakana = tuple.Item1;
romaji = tuple.Item1;
special = tuple.Item1;
session = tuple.Item2;
}
}
GraphemeIndexes = graphemeIndexes;
Phonemes = phonemes;
Session = session;
Dict = hiragana;
Dict = katakana;
Dict = romaji;
Dict = special;
PredCache = predCache;
}
}
}
Loading