From eea982eb905756e9273077d451001f07abe67e58 Mon Sep 17 00:00:00 2001 From: oxygen-dioxide <54425948+oxygen-dioxide@users.noreply.github.com> Date: Tue, 4 Jul 2023 19:27:12 +0800 Subject: [PATCH 1/3] MonophonePhonemizer todo:custom phoneme set support --- .../ChineseCVVMonophonePhonemizer.cs | 117 ++++++++++++++++++ .../MonophonePhonemizer.cs | 19 +++ 2 files changed, 136 insertions(+) create mode 100644 OpenUtau.Plugin.Builtin/ChineseCVVMonophonePhonemizer.cs create mode 100644 OpenUtau.Plugin.Builtin/MonophonePhonemizer.cs diff --git a/OpenUtau.Plugin.Builtin/ChineseCVVMonophonePhonemizer.cs b/OpenUtau.Plugin.Builtin/ChineseCVVMonophonePhonemizer.cs new file mode 100644 index 000000000..fb1b9bf63 --- /dev/null +++ b/OpenUtau.Plugin.Builtin/ChineseCVVMonophonePhonemizer.cs @@ -0,0 +1,117 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading.Tasks; +using OpenUtau.Api; +using Serilog; + +namespace OpenUtau.Plugin.Builtin +{ + [Phonemizer("Chinese CVV Monophone Phonemizer", "ZH CVV MONO", "O3", language: "ZH")] + public class ChineseCVVMonophonePhonemizer : MonophonePhonemizer + { + static readonly string pinyins = "a,ai,an,ang,ao,ba,bai,ban,bang,bao,bei,ben,beng,bi,bian,biao,bie,bin,bing,bo,bu,ca,cai,can,cang,cao,ce,cei,cen,ceng,cha,chai,chan,chang,chao,che,chen,cheng,chi,chong,chou,chu,chua,chuai,chuan,chuang,chui,chun,chuo,ci,cong,cou,cu,cuan,cui,cun,cuo,da,dai,dan,dang,dao,de,dei,den,deng,di,dia,dian,diao,die,ding,diu,dong,dou,du,duan,dui,dun,duo,e,ei,en,eng,er,fa,fan,fang,fei,fen,feng,fo,fou,fu,ga,gai,gan,gang,gao,ge,gei,gen,geng,gong,gou,gu,gua,guai,guan,guang,gui,gun,guo,ha,hai,han,hang,hao,he,hei,hen,heng,hong,hou,hu,hua,huai,huan,huang,hui,hun,huo,ji,jia,jian,jiang,jiao,jie,jin,jing,jiong,jiu,ju,jv,juan,jvan,jue,jve,jun,jvn,ka,kai,kan,kang,kao,ke,kei,ken,keng,kong,kou,ku,kua,kuai,kuan,kuang,kui,kun,kuo,la,lai,lan,lang,lao,le,lei,leng,li,lia,lian,liang,liao,lie,lin,ling,liu,lo,long,lou,lu,luan,lun,luo,lv,lve,ma,mai,man,mang,mao,me,mei,men,meng,mi,mian,miao,mie,min,ming,miu,mo,mou,mu,na,nai,nan,nang,nao,ne,nei,nen,neng,ni,nian,niang,niao,nie,nin,ning,niu,nong,nou,nu,nuan,nun,nuo,nv,nve,o,ou,pa,pai,pan,pang,pao,pei,pen,peng,pi,pian,piao,pie,pin,ping,po,pou,pu,qi,qia,qian,qiang,qiao,qie,qin,qing,qiong,qiu,qu,qv,quan,qvan,que,qve,qun,qvn,ran,rang,rao,re,ren,reng,ri,rong,rou,ru,rua,ruan,rui,run,ruo,sa,sai,san,sang,sao,se,sen,seng,sha,shai,shan,shang,shao,she,shei,shen,sheng,shi,shou,shu,shua,shuai,shuan,shuang,shui,shun,shuo,si,song,sou,su,suan,sui,sun,suo,ta,tai,tan,tang,tao,te,tei,teng,ti,tian,tiao,tie,ting,tong,tou,tu,tuan,tui,tun,tuo,wa,wai,wan,wang,wei,wen,weng,wo,wu,xi,xia,xian,xiang,xiao,xie,xin,xing,xiong,xiu,xu,xv,xuan,xvan,xue,xve,xun,xvn,ya,yan,yang,yao,ye,yi,yin,ying,yo,yong,you,yu,yv,yuan,yvan,yue,yve,yun,yvn,za,zai,zan,zang,zao,ze,zei,zen,zeng,zha,zhai,zhan,zhang,zhao,zhe,zhei,zhen,zheng,zhi,zhong,zhou,zhu,zhua,zhuai,zhuan,zhuang,zhui,zhun,zhuo,zi,zong,zou,zu,zuan,zui,zun"; + static readonly string tails = "_vn,_ing,_ong,_an,_ou,_er,_ao,_eng,_ang,_en,_en2,_ai,_iong,_in,_ei"; + + static readonly string[] pinyinList = pinyins.Split(','); + static readonly string[] tailList = tails.Split(','); + + protected override IG2p LoadBaseDictionary() { + var g2ps = new List(); + + // Load dictionary from plugin folder. + string path = Path.Combine(PluginDir, "zhcvv.yaml"); + if (File.Exists(path)) { + g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); + } + + // Load dictionary from singer folder. + if (singer != null && singer.Found && singer.Loaded) { + string file = Path.Combine(singer.Location, "zhcvv.yaml"); + if (File.Exists(file)) { + try { + g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); + } catch (Exception e) { + Log.Error(e, $"Failed to load {file}"); + } + } + } + g2ps.Add(new ChineseCVVG2p()); + return new G2pFallbacks(g2ps.ToArray()); + } + + protected override string[] GetVowels(){ + return pinyinList; + } + + protected override string[] GetConsonants(){ + return tailList; + } + + Dictionary AliasesFallback = new Dictionary{{"_un","_en"}}; + protected override Dictionary GetAliasesFallback()=>AliasesFallback; + protected override Dictionary GetDictionaryPhonemesReplacement() => new Dictionary(); + } + class ChineseCVVG2p : IG2p{ + /// + /// The consonant table. + /// + static readonly string consonants = "b,p,m,f,d,t,n,l,g,k,h,j,q,x,z,c,s,zh,ch,sh,r,y,w"; + /// + /// The vowel split table. + /// + static readonly string vowels = "ai=_ai,uai=_uai,an=_an,ian=_en2,uan=_an,van=_en2,ang=_ang,iang=_ang,uang=_ang,ao=_ao,iao=_ao,ou=_ou,iu=_ou,ong=_ong,iong=_ong,ei=_ei,ui=_ei,uei=_ei,en=_en,un=_un,uen=_un,eng=_eng,in=_in,ing=_ing,vn=_vn"; + + static HashSet cSet; + static Dictionary vDict; + + static ChineseCVVG2p() { + cSet = new HashSet(consonants.Split(',')); + vDict = vowels.Split(',') + .Select(s => s.Split('=')) + .ToDictionary(a => a[0], a => a[1]); + } + + public bool IsVowel(string phoneme){ + return !phoneme.StartsWith("_"); + } + + public string[] Query(string lyric){ + string consonant = string.Empty; + string vowel = string.Empty; + if (lyric.Length > 2 && cSet.Contains(lyric.Substring(0, 2))) { + // First try to find consonant "zh", "ch" or "sh", and extract vowel. + consonant = lyric.Substring(0, 2); + vowel = lyric.Substring(2); + } else if (lyric.Length > 1 && cSet.Contains(lyric.Substring(0, 1))) { + // Then try to find single character consonants, and extract vowel. + consonant = lyric.Substring(0, 1); + vowel = lyric.Substring(1); + } else { + // Otherwise the lyric is a vowel. + vowel = lyric; + } + if ((vowel == "un" || vowel == "uan") && (consonant == "j" || consonant == "q" || consonant == "x" || consonant == "y")) { + vowel = "v" + vowel.Substring(1); + } + if ((vowel == "an") && (consonant == "y")) { + vowel = "ian"; + } + if(vDict.TryGetValue(vowel, out var tail)){ + return new string[] { lyric, tail }; + }else{ + return new string[] { lyric }; + } + + } + public bool IsValidSymbol(string symbol){ + return true; + } + + public string[] UnpackHint(string hint, char separator = ' ') { + return hint.Split(separator) + .ToArray(); + } + } +} \ No newline at end of file diff --git a/OpenUtau.Plugin.Builtin/MonophonePhonemizer.cs b/OpenUtau.Plugin.Builtin/MonophonePhonemizer.cs new file mode 100644 index 000000000..308664b03 --- /dev/null +++ b/OpenUtau.Plugin.Builtin/MonophonePhonemizer.cs @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; + +namespace OpenUtau.Plugin.Builtin +{ + public abstract class MonophonePhonemizer : SyllableBasedPhonemizer + { + protected override string GetDictionaryName() => ""; + protected override List ProcessSyllable(Syllable syllable){ + return syllable.cc.Append(syllable.v).ToList(); + } + + protected override List ProcessEnding(Ending ending){ + return ending.cc.ToList(); + } + } +} \ No newline at end of file From 7faf722ab34e175f28cb40b77b1f548fd6389789 Mon Sep 17 00:00:00 2001 From: oxygen-dioxide <54425948+oxygen-dioxide@users.noreply.github.com> Date: Fri, 14 Jul 2023 10:33:49 +0800 Subject: [PATCH 2/3] Chinese CVV: custom dictionary support --- .../ChineseCVVMonophonePhonemizer.cs | 117 ---------- .../ChineseCVVPhonemizer.cs | 138 +++++------ .../LatinDiphonePhonemizer.cs | 203 +---------------- .../MonophonePhonemizer.cs | 27 ++- .../PhonemeBasedPhonemizer.cs | 215 ++++++++++++++++++ 5 files changed, 306 insertions(+), 394 deletions(-) delete mode 100644 OpenUtau.Plugin.Builtin/ChineseCVVMonophonePhonemizer.cs create mode 100644 OpenUtau.Plugin.Builtin/PhonemeBasedPhonemizer.cs diff --git a/OpenUtau.Plugin.Builtin/ChineseCVVMonophonePhonemizer.cs b/OpenUtau.Plugin.Builtin/ChineseCVVMonophonePhonemizer.cs deleted file mode 100644 index fb1b9bf63..000000000 --- a/OpenUtau.Plugin.Builtin/ChineseCVVMonophonePhonemizer.cs +++ /dev/null @@ -1,117 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Threading.Tasks; -using OpenUtau.Api; -using Serilog; - -namespace OpenUtau.Plugin.Builtin -{ - [Phonemizer("Chinese CVV Monophone Phonemizer", "ZH CVV MONO", "O3", language: "ZH")] - public class ChineseCVVMonophonePhonemizer : MonophonePhonemizer - { - static readonly string pinyins = "a,ai,an,ang,ao,ba,bai,ban,bang,bao,bei,ben,beng,bi,bian,biao,bie,bin,bing,bo,bu,ca,cai,can,cang,cao,ce,cei,cen,ceng,cha,chai,chan,chang,chao,che,chen,cheng,chi,chong,chou,chu,chua,chuai,chuan,chuang,chui,chun,chuo,ci,cong,cou,cu,cuan,cui,cun,cuo,da,dai,dan,dang,dao,de,dei,den,deng,di,dia,dian,diao,die,ding,diu,dong,dou,du,duan,dui,dun,duo,e,ei,en,eng,er,fa,fan,fang,fei,fen,feng,fo,fou,fu,ga,gai,gan,gang,gao,ge,gei,gen,geng,gong,gou,gu,gua,guai,guan,guang,gui,gun,guo,ha,hai,han,hang,hao,he,hei,hen,heng,hong,hou,hu,hua,huai,huan,huang,hui,hun,huo,ji,jia,jian,jiang,jiao,jie,jin,jing,jiong,jiu,ju,jv,juan,jvan,jue,jve,jun,jvn,ka,kai,kan,kang,kao,ke,kei,ken,keng,kong,kou,ku,kua,kuai,kuan,kuang,kui,kun,kuo,la,lai,lan,lang,lao,le,lei,leng,li,lia,lian,liang,liao,lie,lin,ling,liu,lo,long,lou,lu,luan,lun,luo,lv,lve,ma,mai,man,mang,mao,me,mei,men,meng,mi,mian,miao,mie,min,ming,miu,mo,mou,mu,na,nai,nan,nang,nao,ne,nei,nen,neng,ni,nian,niang,niao,nie,nin,ning,niu,nong,nou,nu,nuan,nun,nuo,nv,nve,o,ou,pa,pai,pan,pang,pao,pei,pen,peng,pi,pian,piao,pie,pin,ping,po,pou,pu,qi,qia,qian,qiang,qiao,qie,qin,qing,qiong,qiu,qu,qv,quan,qvan,que,qve,qun,qvn,ran,rang,rao,re,ren,reng,ri,rong,rou,ru,rua,ruan,rui,run,ruo,sa,sai,san,sang,sao,se,sen,seng,sha,shai,shan,shang,shao,she,shei,shen,sheng,shi,shou,shu,shua,shuai,shuan,shuang,shui,shun,shuo,si,song,sou,su,suan,sui,sun,suo,ta,tai,tan,tang,tao,te,tei,teng,ti,tian,tiao,tie,ting,tong,tou,tu,tuan,tui,tun,tuo,wa,wai,wan,wang,wei,wen,weng,wo,wu,xi,xia,xian,xiang,xiao,xie,xin,xing,xiong,xiu,xu,xv,xuan,xvan,xue,xve,xun,xvn,ya,yan,yang,yao,ye,yi,yin,ying,yo,yong,you,yu,yv,yuan,yvan,yue,yve,yun,yvn,za,zai,zan,zang,zao,ze,zei,zen,zeng,zha,zhai,zhan,zhang,zhao,zhe,zhei,zhen,zheng,zhi,zhong,zhou,zhu,zhua,zhuai,zhuan,zhuang,zhui,zhun,zhuo,zi,zong,zou,zu,zuan,zui,zun"; - static readonly string tails = "_vn,_ing,_ong,_an,_ou,_er,_ao,_eng,_ang,_en,_en2,_ai,_iong,_in,_ei"; - - static readonly string[] pinyinList = pinyins.Split(','); - static readonly string[] tailList = tails.Split(','); - - protected override IG2p LoadBaseDictionary() { - var g2ps = new List(); - - // Load dictionary from plugin folder. - string path = Path.Combine(PluginDir, "zhcvv.yaml"); - if (File.Exists(path)) { - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); - } - - // Load dictionary from singer folder. - if (singer != null && singer.Found && singer.Loaded) { - string file = Path.Combine(singer.Location, "zhcvv.yaml"); - if (File.Exists(file)) { - try { - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); - } catch (Exception e) { - Log.Error(e, $"Failed to load {file}"); - } - } - } - g2ps.Add(new ChineseCVVG2p()); - return new G2pFallbacks(g2ps.ToArray()); - } - - protected override string[] GetVowels(){ - return pinyinList; - } - - protected override string[] GetConsonants(){ - return tailList; - } - - Dictionary AliasesFallback = new Dictionary{{"_un","_en"}}; - protected override Dictionary GetAliasesFallback()=>AliasesFallback; - protected override Dictionary GetDictionaryPhonemesReplacement() => new Dictionary(); - } - class ChineseCVVG2p : IG2p{ - /// - /// The consonant table. - /// - static readonly string consonants = "b,p,m,f,d,t,n,l,g,k,h,j,q,x,z,c,s,zh,ch,sh,r,y,w"; - /// - /// The vowel split table. - /// - static readonly string vowels = "ai=_ai,uai=_uai,an=_an,ian=_en2,uan=_an,van=_en2,ang=_ang,iang=_ang,uang=_ang,ao=_ao,iao=_ao,ou=_ou,iu=_ou,ong=_ong,iong=_ong,ei=_ei,ui=_ei,uei=_ei,en=_en,un=_un,uen=_un,eng=_eng,in=_in,ing=_ing,vn=_vn"; - - static HashSet cSet; - static Dictionary vDict; - - static ChineseCVVG2p() { - cSet = new HashSet(consonants.Split(',')); - vDict = vowels.Split(',') - .Select(s => s.Split('=')) - .ToDictionary(a => a[0], a => a[1]); - } - - public bool IsVowel(string phoneme){ - return !phoneme.StartsWith("_"); - } - - public string[] Query(string lyric){ - string consonant = string.Empty; - string vowel = string.Empty; - if (lyric.Length > 2 && cSet.Contains(lyric.Substring(0, 2))) { - // First try to find consonant "zh", "ch" or "sh", and extract vowel. - consonant = lyric.Substring(0, 2); - vowel = lyric.Substring(2); - } else if (lyric.Length > 1 && cSet.Contains(lyric.Substring(0, 1))) { - // Then try to find single character consonants, and extract vowel. - consonant = lyric.Substring(0, 1); - vowel = lyric.Substring(1); - } else { - // Otherwise the lyric is a vowel. - vowel = lyric; - } - if ((vowel == "un" || vowel == "uan") && (consonant == "j" || consonant == "q" || consonant == "x" || consonant == "y")) { - vowel = "v" + vowel.Substring(1); - } - if ((vowel == "an") && (consonant == "y")) { - vowel = "ian"; - } - if(vDict.TryGetValue(vowel, out var tail)){ - return new string[] { lyric, tail }; - }else{ - return new string[] { lyric }; - } - - } - public bool IsValidSymbol(string symbol){ - return true; - } - - public string[] UnpackHint(string hint, char separator = ' ') { - return hint.Split(separator) - .ToArray(); - } - } -} \ No newline at end of file diff --git a/OpenUtau.Plugin.Builtin/ChineseCVVPhonemizer.cs b/OpenUtau.Plugin.Builtin/ChineseCVVPhonemizer.cs index 8a6b902b5..dcac65e18 100644 --- a/OpenUtau.Plugin.Builtin/ChineseCVVPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/ChineseCVVPhonemizer.cs @@ -1,16 +1,62 @@ +using System; using System.Collections.Generic; +using System.IO; using System.Linq; +using System.Threading.Tasks; using OpenUtau.Api; -using OpenUtau.Core; -using OpenUtau.Core.Ustx; +using Serilog; -namespace OpenUtau.Plugin.Builtin { +namespace OpenUtau.Plugin.Builtin +{ /// /// Chinese 十月式整音扩张 CVV Phonemizer. /// It works by spliting "duang" to "duang" + "_ang", to produce the proper tail sound. /// [Phonemizer("Chinese CVV (十月式整音扩张) Phonemizer", "ZH CVV", language: "ZH")] - public class ChineseCVVPhonemizer : BaseChinesePhonemizer { + public class ChineseCVVMonophonePhonemizer : MonophonePhonemizer + { + static readonly string pinyins = "a,ai,an,ang,ao,ba,bai,ban,bang,bao,bei,ben,beng,bi,bian,biao,bie,bin,bing,bo,bu,ca,cai,can,cang,cao,ce,cei,cen,ceng,cha,chai,chan,chang,chao,che,chen,cheng,chi,chong,chou,chu,chua,chuai,chuan,chuang,chui,chun,chuo,ci,cong,cou,cu,cuan,cui,cun,cuo,da,dai,dan,dang,dao,de,dei,den,deng,di,dia,dian,diao,die,ding,diu,dong,dou,du,duan,dui,dun,duo,e,ei,en,eng,er,fa,fan,fang,fei,fen,feng,fo,fou,fu,ga,gai,gan,gang,gao,ge,gei,gen,geng,gong,gou,gu,gua,guai,guan,guang,gui,gun,guo,ha,hai,han,hang,hao,he,hei,hen,heng,hong,hou,hu,hua,huai,huan,huang,hui,hun,huo,ji,jia,jian,jiang,jiao,jie,jin,jing,jiong,jiu,ju,jv,juan,jvan,jue,jve,jun,jvn,ka,kai,kan,kang,kao,ke,kei,ken,keng,kong,kou,ku,kua,kuai,kuan,kuang,kui,kun,kuo,la,lai,lan,lang,lao,le,lei,leng,li,lia,lian,liang,liao,lie,lin,ling,liu,lo,long,lou,lu,luan,lun,luo,lv,lve,ma,mai,man,mang,mao,me,mei,men,meng,mi,mian,miao,mie,min,ming,miu,mo,mou,mu,na,nai,nan,nang,nao,ne,nei,nen,neng,ni,nian,niang,niao,nie,nin,ning,niu,nong,nou,nu,nuan,nun,nuo,nv,nve,o,ou,pa,pai,pan,pang,pao,pei,pen,peng,pi,pian,piao,pie,pin,ping,po,pou,pu,qi,qia,qian,qiang,qiao,qie,qin,qing,qiong,qiu,qu,qv,quan,qvan,que,qve,qun,qvn,ran,rang,rao,re,ren,reng,ri,rong,rou,ru,rua,ruan,rui,run,ruo,sa,sai,san,sang,sao,se,sen,seng,sha,shai,shan,shang,shao,she,shei,shen,sheng,shi,shou,shu,shua,shuai,shuan,shuang,shui,shun,shuo,si,song,sou,su,suan,sui,sun,suo,ta,tai,tan,tang,tao,te,tei,teng,ti,tian,tiao,tie,ting,tong,tou,tu,tuan,tui,tun,tuo,wa,wai,wan,wang,wei,wen,weng,wo,wu,xi,xia,xian,xiang,xiao,xie,xin,xing,xiong,xiu,xu,xv,xuan,xvan,xue,xve,xun,xvn,ya,yan,yang,yao,ye,yi,yin,ying,yo,yong,you,yu,yv,yuan,yvan,yue,yve,yun,yvn,za,zai,zan,zang,zao,ze,zei,zen,zeng,zha,zhai,zhan,zhang,zhao,zhe,zhei,zhen,zheng,zhi,zhong,zhou,zhu,zhua,zhuai,zhuan,zhuang,zhui,zhun,zhuo,zi,zong,zou,zu,zuan,zui,zun"; + static readonly string tails = "_vn,_ing,_ong,_an,_ou,_er,_ao,_eng,_ang,_en,_en2,_ai,_iong,_in,_ei"; + + static readonly string[] pinyinList = pinyins.Split(','); + static readonly string[] tailList = tails.Split(','); + + public ChineseCVVMonophonePhonemizer() { + ConsonantLength = 120; + } + + protected override IG2p LoadG2p() { + var g2ps = new List(); + + // Load dictionary from plugin folder. + string path = Path.Combine(PluginDir, "zhcvv.yaml"); + if (File.Exists(path)) { + g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); + } + + // Load dictionary from singer folder. + if (singer != null && singer.Found && singer.Loaded) { + string file = Path.Combine(singer.Location, "zhcvv.yaml"); + if (File.Exists(file)) { + try { + g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); + } catch (Exception e) { + Log.Error(e, $"Failed to load {file}"); + } + } + } + g2ps.Add(new ChineseCVVG2p()); + return new G2pFallbacks(g2ps.ToArray()); + } + + protected override Dictionary LoadVowelFallbacks() { + return "_un=_en".Split(';') + .Select(entry => entry.Split('=')) + .ToDictionary(parts => parts[0], parts => parts[1].Split(',')); + } + } + + class ChineseCVVG2p : IG2p{ /// /// The consonant table. /// @@ -22,26 +68,22 @@ public class ChineseCVVPhonemizer : BaseChinesePhonemizer { static HashSet cSet; static Dictionary vDict; - - static ChineseCVVPhonemizer() { + + static ChineseCVVG2p() { cSet = new HashSet(consonants.Split(',')); vDict = vowels.Split(',') .Select(s => s.Split('=')) .ToDictionary(a => a[0], a => a[1]); } - private USinger singer; - - // Simply stores the singer in a field. - public override void SetSinger(USinger singer) => this.singer = singer; + public bool IsVowel(string phoneme){ + return !phoneme.StartsWith("_"); + } - public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) { + public string[] Query(string lyric){ // The overall logic is: // 1. Remove consonant: "duang" -> "uang". // 2. Lookup the trailing sound in vowel table: "uang" -> "_ang". - // 3. Split the total duration and returns "duang" and "_ang". - var lyric = notes[0].lyric; - var note = notes[0]; string consonant = string.Empty; string vowel = string.Empty; if (lyric.Length > 2 && cSet.Contains(lyric.Substring(0, 2))) { @@ -63,62 +105,20 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN if ((vowel == "an") && (consonant == "y")) { vowel = "ian"; } - string phoneme0 = lyric; - // Get color - string color = string.Empty; - int toneShift = 0; - if (note.phonemeAttributes != null) { - var attr = note.phonemeAttributes.FirstOrDefault(attr => attr.index == 0); - color = attr.voiceColor; - toneShift = attr.toneShift; + if(vDict.TryGetValue(vowel, out var tail)){ + return new string[] { lyric, tail }; + }else{ + return new string[] { lyric }; } - // We will need to split the total duration for phonemes, so we compute it here. - int totalDuration = notes.Sum(n => n.duration); - // Lookup the vowel split table. For example, "uang" will match "_ang". - if (vDict.TryGetValue(vowel, out var phoneme1)) { - // Now phoneme0="duang" and phoneme1="_ang", - // try to give "_ang" 120 ticks, but no more than half of the total duration. - int length1 = 120; - if (length1 > totalDuration / 2) { - length1 = totalDuration / 2; - } - if (singer.TryGetMappedOto(phoneme0, note.tone + toneShift, color, out var oto0)) { - phoneme0 = oto0.Alias; - } - - if (singer.TryGetMappedOto(phoneme1, note.tone + toneShift, color, out var oto1)) { - phoneme1 = oto1.Alias; - } - - if (phoneme1.Contains("_un") && !singer.TryGetMappedOto(phoneme1, note.tone + toneShift, color, out var oto2)) { - phoneme1 = "_en"; - } else if (phoneme1.Contains("_un") && singer.TryGetMappedOto(phoneme1, note.tone + toneShift, color, out var oto3)) { - phoneme1 = oto3.Alias; - } + + } + public bool IsValidSymbol(string symbol){ + return true; + } - return new Result { - phonemes = new Phoneme[] { - new Phoneme() { - phoneme = phoneme0, - }, - new Phoneme() { - phoneme = phoneme1, - position = totalDuration - length1, - } - }, - }; - } - if (singer.TryGetMappedOto(phoneme0, note.tone + toneShift, color, out var oto)) { - phoneme0 = oto.Alias; - } - // Not spliting is needed. Return as is. - return new Result { - phonemes = new Phoneme[] { - new Phoneme() { - phoneme = phoneme0, - } - }, - }; + public string[] UnpackHint(string hint, char separator = ' ') { + return hint.Split(separator) + .ToArray(); } } -} +} \ No newline at end of file diff --git a/OpenUtau.Plugin.Builtin/LatinDiphonePhonemizer.cs b/OpenUtau.Plugin.Builtin/LatinDiphonePhonemizer.cs index bab726230..56eef32e2 100644 --- a/OpenUtau.Plugin.Builtin/LatinDiphonePhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/LatinDiphonePhonemizer.cs @@ -10,172 +10,8 @@ namespace OpenUtau.Plugin.Builtin { /// /// A base diphone phonemizer for latin languages. /// - public abstract class LatinDiphonePhonemizer : Phonemizer { - protected Dictionary vowelFallback; - protected USinger singer; - protected IG2p g2p; - protected bool isDictionaryLoading; - - //[(index of phoneme, tick position from the lyrical note in notes[], is manual)] - protected readonly List> alignments = new List>(); - - /// - /// This property will later be exposed in UI for user adjustment. - /// - public int ConsonantLength { get; set; } = 60; - - public LatinDiphonePhonemizer() { - try { - Initialize(); - } catch (Exception e) { - Log.Error(e, "Failed to initialize."); - } - } - - protected abstract IG2p LoadG2p(); - - protected abstract Dictionary LoadVowelFallbacks(); - - protected void Initialize() { - g2p = LoadG2p(); - vowelFallback = LoadVowelFallbacks(); - } - - public override void SetSinger(USinger singer) { - this.singer = singer; - g2p = LoadG2p(); - } - - public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) { - if (isDictionaryLoading) { - return MakeSimpleResult(""); - } - var note = notes[0]; - - // Force alias using ? prefix - if (!string.IsNullOrEmpty(note.lyric) && note.lyric[0] == '?') { - return MakeSimpleResult(note.lyric.Substring(1)); - } - - // Get the symbols of previous note. - var prevSymbols = prevNeighbour == null ? null : GetSymbols(prevNeighbour.Value); - // The user is using a tail "-" note to produce a " -" sound. - if (note.lyric == "-" && prevSymbols != null) { - var attr = note.phonemeAttributes?.FirstOrDefault() ?? default; - string color = attr.voiceColor; - string alias = $"{prevSymbols.Last()} -"; - if (singer.TryGetMappedOto(alias, note.tone, color, out var oto)) { - return MakeSimpleResult(oto.Alias); - } - return MakeSimpleResult(alias); - } - // Get the symbols of current note. - string[] symbols = GetSymbols(note); - if (nextNeighbour == null) { - // Auto add tail "-". - symbols = symbols.Append("-").ToArray(); - } - if (symbols == null || symbols.Length == 0) { - // No symbol is found for current note. - // Otherwise assumes the user put in an alias. - return MakeSimpleResult(note.lyric); - } - // Find phone types of symbols. - var isVowel = symbols.Select(s => g2p.IsVowel(s)).ToArray(); - // Arpasing aligns the first vowel at 0 and shifts leading consonants to negative positions, - // so we need to find the first vowel. - var phonemes = new Phoneme[symbols.Length]; - - // Alignments - // - Tries to align every note to one syllable. - // - "+n" manually aligns to n-th phoneme. - alignments.Clear(); - //notes except those whose lyrics start witn "+*" or "+~" - var nonExtensionNotes = notes.Where(n=>!IsSyllableVowelExtensionNote(n)).ToArray(); - for (int i = 0; i < symbols.Length; i++) { - if (isVowel[i] && alignments.Count < nonExtensionNotes.Length) { - alignments.Add(Tuple.Create(i, nonExtensionNotes[alignments.Count].position - notes[0].position, false)); - } - } - int position = notes[0].duration; - for (int i = 1; i < notes.Length; ++i) { - if (int.TryParse(notes[i].lyric.Substring(1), out var idx)) { - alignments.Add(Tuple.Create(idx - 1, position, true)); - } - position += notes[i].duration; - } - alignments.Add(Tuple.Create(phonemes.Length, position, true)); - alignments.Sort((a, b) => a.Item1.CompareTo(b.Item1)); - for (int i = 0; i < alignments.Count; ++i) { - if (alignments[i].Item3) { - while (i > 0 && (alignments[i - 1].Item2 >= alignments[i].Item2 || - alignments[i - 1].Item1 == alignments[i].Item1)) { - alignments.RemoveAt(i - 1); - i--; - } - while (i < alignments.Count - 1 && (alignments[i + 1].Item2 <= alignments[i].Item2 || - alignments[i + 1].Item1 == alignments[i].Item1)) { - alignments.RemoveAt(i + 1); - } - } - } - - int startIndex = 0; - int firstVowel = Array.IndexOf(isVowel, true); - int startTick = -ConsonantLength * firstVowel; - foreach (var alignment in alignments) { - // Distributes phonemes between two aligment points. - DistributeDuration(isVowel, phonemes, startIndex, alignment.Item1, startTick, alignment.Item2); - startIndex = alignment.Item1; - startTick = alignment.Item2; - } - alignments.Clear(); - - // Select aliases. - int noteIndex = 0; - string prevSymbol = prevSymbols == null ? "-" : prevSymbols.Last(); - for (int i = 0; i < symbols.Length; i++) { - var attr = note.phonemeAttributes?.FirstOrDefault(attr => attr.index == i) ?? default; - string alt = attr.alternate?.ToString() ?? string.Empty; - string color = attr.voiceColor; - int toneShift = attr.toneShift; - var phoneme = phonemes[i]; - while (noteIndex < notes.Length - 1 && notes[noteIndex].position - note.position < phoneme.position) { - noteIndex++; - } - int tone = (i == 0 && prevNeighbours != null && prevNeighbours.Length > 0) - ? prevNeighbours.Last().tone : notes[noteIndex].tone; - phoneme.phoneme = GetPhonemeOrFallback(prevSymbol, symbols[i], tone + toneShift, color, alt); - phonemes[i] = phoneme; - prevSymbol = symbols[i]; - } - - return new Result { - phonemes = phonemes, - }; - } - - /// - /// Does this note extend the previous syllable? - /// - /// - /// - protected bool IsSyllableVowelExtensionNote(Note note) { - return note.lyric.StartsWith("+~") || note.lyric.StartsWith("+*"); - } - - string[] GetSymbols(Note note) { - if (string.IsNullOrEmpty(note.phoneticHint)) { - // User has not provided hint, query CMUdict. - return g2p.Query(note.lyric.ToLowerInvariant()); - } - // Split space-separated symbols into an array. - return note.phoneticHint.Split() - .Where(s => g2p.IsValidSymbol(s)) // skip the invalid symbols. - .ToArray(); - } - - string GetPhonemeOrFallback(string prevSymbol, string symbol, int tone, string color, string alt) { + public abstract class LatinDiphonePhonemizer : PhonemeBasedPhonemizer { + protected override string GetPhonemeOrFallback(string prevSymbol, string symbol, int tone, string color, string alt) { if (!string.IsNullOrEmpty(alt) && singer.TryGetMappedOto($"{prevSymbol} {symbol}{alt}", tone, color, out var oto)) { return oto.Alias; } @@ -194,40 +30,5 @@ string GetPhonemeOrFallback(string prevSymbol, string symbol, int tone, string c } return $"{prevSymbol} {symbol}{alt}"; } - - void DistributeDuration(bool[] isVowel, Phoneme[] phonemes, int startIndex, int endIndex, int startTick, int endTick) { - if (startIndex == endIndex) { - return; - } - // First count number of vowels and consonants. - int consonants = 0; - int vowels = 0; - int duration = endTick - startTick; - for (int i = startIndex; i < endIndex; i++) { - if (isVowel[i]) { - vowels++; - } else { - consonants++; - } - } - // If vowels exist, consonants are given fixed length, but combined no more than half duration. - // However, if no vowel exists, consonants are evenly distributed within the total duration. - int consonantDuration = vowels > 0 - ? (consonants > 0 ? Math.Min(ConsonantLength, duration / 2 / consonants) : 0) - : duration / consonants; - // Vowels are evenly distributed within (total duration - total consonant duration). - int vowelDuration = vowels > 0 ? (duration - consonantDuration * consonants) / vowels : 0; - int position = startTick; - // Compute positions using previously computed durations. - for (int i = startIndex; i < endIndex; i++) { - if (isVowel[i]) { - phonemes[i].position = position; - position += vowelDuration; - } else { - phonemes[i].position = position; - position += consonantDuration; - } - } - } } } diff --git a/OpenUtau.Plugin.Builtin/MonophonePhonemizer.cs b/OpenUtau.Plugin.Builtin/MonophonePhonemizer.cs index 308664b03..d0648ad22 100644 --- a/OpenUtau.Plugin.Builtin/MonophonePhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/MonophonePhonemizer.cs @@ -2,18 +2,31 @@ using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; +using Serilog; namespace OpenUtau.Plugin.Builtin { - public abstract class MonophonePhonemizer : SyllableBasedPhonemizer + public abstract class MonophonePhonemizer : PhonemeBasedPhonemizer { - protected override string GetDictionaryName() => ""; - protected override List ProcessSyllable(Syllable syllable){ - return syllable.cc.Append(syllable.v).ToList(); + public MonophonePhonemizer(){ + addTail = false; } - - protected override List ProcessEnding(Ending ending){ - return ending.cc.ToList(); + + protected override string GetPhonemeOrFallback(string prevSymbol, string symbol, int tone, string color, string alt) { + if (!string.IsNullOrEmpty(alt) && singer.TryGetMappedOto($"{symbol}{alt}", tone, color, out var oto)) { + return oto.Alias; + } + if (singer.TryGetMappedOto(symbol, tone, color, out var oto1)) { + return oto1.Alias; + } + if (vowelFallback.TryGetValue(symbol, out string[] fallbacks)) { + foreach (var fallback in fallbacks) { + if (singer.TryGetMappedOto(fallback, tone, color, out var oto2)) { + return oto2.Alias; + } + } + } + return $"{symbol}{alt}"; } } } \ No newline at end of file diff --git a/OpenUtau.Plugin.Builtin/PhonemeBasedPhonemizer.cs b/OpenUtau.Plugin.Builtin/PhonemeBasedPhonemizer.cs new file mode 100644 index 000000000..e0ed68bd8 --- /dev/null +++ b/OpenUtau.Plugin.Builtin/PhonemeBasedPhonemizer.cs @@ -0,0 +1,215 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using OpenUtau.Api; +using OpenUtau.Core.Ustx; +using Serilog; + +namespace OpenUtau.Plugin.Builtin +{ + public abstract class PhonemeBasedPhonemizer : Phonemizer + { + protected Dictionary vowelFallback; + protected USinger singer; + protected IG2p g2p; + protected bool isDictionaryLoading; + + //[(index of phoneme, tick position from the lyrical note in notes[], is manual)] + protected readonly List> alignments = new List>(); + + /// + /// This property will later be exposed in UI for user adjustment. + /// + public int ConsonantLength { get; set; } = 60; + + public bool addTail { get; set; } = true; + + public PhonemeBasedPhonemizer() { + try { + Initialize(); + } catch (Exception e) { + Log.Error(e, "Failed to initialize."); + } + } + + protected abstract IG2p LoadG2p(); + + protected abstract Dictionary LoadVowelFallbacks(); + + protected void Initialize() { + g2p = LoadG2p(); + vowelFallback = LoadVowelFallbacks(); + } + + public override void SetSinger(USinger singer) { + this.singer = singer; + g2p = LoadG2p(); + } + + public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) { + if (isDictionaryLoading) { + return MakeSimpleResult(""); + } + var note = notes[0]; + + // Force alias using ? prefix + if (!string.IsNullOrEmpty(note.lyric) && note.lyric[0] == '?') { + return MakeSimpleResult(note.lyric.Substring(1)); + } + + // Get the symbols of previous note. + var prevSymbols = prevNeighbour == null ? null : GetSymbols(prevNeighbour.Value); + // The user is using a tail "-" note to produce a " -" sound. + if (note.lyric == "-" && prevSymbols != null) { + var attr = note.phonemeAttributes?.FirstOrDefault() ?? default; + string color = attr.voiceColor; + string alias = $"{prevSymbols.Last()} -"; + if (singer.TryGetMappedOto(alias, note.tone, color, out var oto)) { + return MakeSimpleResult(oto.Alias); + } + return MakeSimpleResult(alias); + } + // Get the symbols of current note. + string[] symbols = GetSymbols(note); + if (addTail && nextNeighbour == null) { + // Auto add tail "-". + symbols = symbols.Append("-").ToArray(); + } + if (symbols == null || symbols.Length == 0) { + // No symbol is found for current note. + // Otherwise assumes the user put in an alias. + return MakeSimpleResult(note.lyric); + } + // Find phone types of symbols. + var isVowel = symbols.Select(s => g2p.IsVowel(s)).ToArray(); + // Arpasing aligns the first vowel at 0 and shifts leading consonants to negative positions, + // so we need to find the first vowel. + var phonemes = new Phoneme[symbols.Length]; + + // Alignments + // - Tries to align every note to one syllable. + // - "+n" manually aligns to n-th phoneme. + alignments.Clear(); + //notes except those whose lyrics start witn "+*" or "+~" + var nonExtensionNotes = notes.Where(n=>!IsSyllableVowelExtensionNote(n)).ToArray(); + for (int i = 0; i < symbols.Length; i++) { + if (isVowel[i] && alignments.Count < nonExtensionNotes.Length) { + alignments.Add(Tuple.Create(i, nonExtensionNotes[alignments.Count].position - notes[0].position, false)); + } + } + int position = notes[0].duration; + for (int i = 1; i < notes.Length; ++i) { + if (int.TryParse(notes[i].lyric.Substring(1), out var idx)) { + alignments.Add(Tuple.Create(idx - 1, position, true)); + } + position += notes[i].duration; + } + alignments.Add(Tuple.Create(phonemes.Length, position, true)); + alignments.Sort((a, b) => a.Item1.CompareTo(b.Item1)); + for (int i = 0; i < alignments.Count; ++i) { + if (alignments[i].Item3) { + while (i > 0 && (alignments[i - 1].Item2 >= alignments[i].Item2 || + alignments[i - 1].Item1 == alignments[i].Item1)) { + alignments.RemoveAt(i - 1); + i--; + } + while (i < alignments.Count - 1 && (alignments[i + 1].Item2 <= alignments[i].Item2 || + alignments[i + 1].Item1 == alignments[i].Item1)) { + alignments.RemoveAt(i + 1); + } + } + } + + int startIndex = 0; + int firstVowel = Array.IndexOf(isVowel, true); + int startTick = -ConsonantLength * firstVowel; + foreach (var alignment in alignments) { + // Distributes phonemes between two aligment points. + DistributeDuration(isVowel, phonemes, startIndex, alignment.Item1, startTick, alignment.Item2); + startIndex = alignment.Item1; + startTick = alignment.Item2; + } + alignments.Clear(); + + // Select aliases. + int noteIndex = 0; + string prevSymbol = prevSymbols == null ? "-" : prevSymbols.Last(); + for (int i = 0; i < symbols.Length; i++) { + var attr = note.phonemeAttributes?.FirstOrDefault(attr => attr.index == i) ?? default; + string alt = attr.alternate?.ToString() ?? string.Empty; + string color = attr.voiceColor; + int toneShift = attr.toneShift; + var phoneme = phonemes[i]; + while (noteIndex < notes.Length - 1 && notes[noteIndex].position - note.position < phoneme.position) { + noteIndex++; + } + int tone = (i == 0 && prevNeighbours != null && prevNeighbours.Length > 0) + ? prevNeighbours.Last().tone : notes[noteIndex].tone; + phoneme.phoneme = GetPhonemeOrFallback(prevSymbol, symbols[i], tone + toneShift, color, alt); + phonemes[i] = phoneme; + prevSymbol = symbols[i]; + } + + return new Result { + phonemes = phonemes, + }; + } + + /// + /// Does this note extend the previous syllable? + /// + /// + /// + protected bool IsSyllableVowelExtensionNote(Note note) { + return note.lyric.StartsWith("+~") || note.lyric.StartsWith("+*"); + } + + string[] GetSymbols(Note note) { + if (string.IsNullOrEmpty(note.phoneticHint)) { + // User has not provided hint, query CMUdict. + return g2p.Query(note.lyric.ToLowerInvariant()); + } + // Split space-separated symbols into an array. + return note.phoneticHint.Split() + .Where(s => g2p.IsValidSymbol(s)) // skip the invalid symbols. + .ToArray(); + } + + protected abstract string GetPhonemeOrFallback(string prevSymbol, string symbol, int tone, string color, string alt); + + void DistributeDuration(bool[] isVowel, Phoneme[] phonemes, int startIndex, int endIndex, int startTick, int endTick) { + if (startIndex == endIndex) { + return; + } + // First count number of vowels and consonants. + int consonants = 0; + int vowels = 0; + int duration = endTick - startTick; + for (int i = startIndex; i < endIndex; i++) { + if (isVowel[i]) { + vowels++; + } else { + consonants++; + } + } + // If vowels exist, consonants are given fixed length, but combined no more than half duration. + // However, if no vowel exists, consonants are evenly distributed within the total duration. + int consonantDuration = vowels > 0 + ? (consonants > 0 ? Math.Min(ConsonantLength, duration / 2 / consonants) : 0) + : duration / consonants; + // Vowels are evenly distributed within (total duration - total consonant duration). + int vowelDuration = vowels > 0 ? (duration - consonantDuration * consonants) / vowels : 0; + int position = startTick; + // Compute positions using previously computed durations. + for (int i = startIndex; i < endIndex; i++) { + if (isVowel[i]) { + phonemes[i].position = position; + position += vowelDuration; + } else { + phonemes[i].position = position; + position += consonantDuration; + } + } + } + } +} \ No newline at end of file From ff2198f44e0443453e8e62f67a1fd23516b571ec Mon Sep 17 00:00:00 2001 From: oxygen-dioxide <54425948+oxygen-dioxide@users.noreply.github.com> Date: Fri, 14 Jul 2023 16:42:05 +0800 Subject: [PATCH 3/3] Chinese CVV: hanzi to pinyin --- OpenUtau.Plugin.Builtin/ChineseCVVPhonemizer.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/OpenUtau.Plugin.Builtin/ChineseCVVPhonemizer.cs b/OpenUtau.Plugin.Builtin/ChineseCVVPhonemizer.cs index dcac65e18..28ffb1dc4 100644 --- a/OpenUtau.Plugin.Builtin/ChineseCVVPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/ChineseCVVPhonemizer.cs @@ -2,8 +2,8 @@ using System.Collections.Generic; using System.IO; using System.Linq; -using System.Threading.Tasks; using OpenUtau.Api; +using OpenUtau.Core; using Serilog; namespace OpenUtau.Plugin.Builtin @@ -54,6 +54,10 @@ protected override Dictionary LoadVowelFallbacks() { .Select(entry => entry.Split('=')) .ToDictionary(parts => parts[0], parts => parts[1].Split(',')); } + + public override void SetUp(Note[][] groups) { + BaseChinesePhonemizer.RomanizeNotes(groups); + } } class ChineseCVVG2p : IG2p{