From 2cc6810b3795924ee2545adea620596ffce1b3a0 Mon Sep 17 00:00:00 2001 From: Lotte V Date: Sat, 18 May 2024 23:26:17 +0200 Subject: [PATCH 1/5] Add Japanese monophone G2P + add support to Diffsinger Japanese Phonemizer --- .../DiffSingerJapanesePhonemizer.cs | 14 +- OpenUtau.Core/G2p/Data/Resources.Designer.cs | 26 ++-- OpenUtau.Core/G2p/Data/Resources.resx | 3 + OpenUtau.Core/G2p/Data/g2p-ja-mono.zip | Bin 0 -> 11025 bytes OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs | 126 ++++++++++++++++++ 5 files changed, 160 insertions(+), 9 deletions(-) create mode 100644 OpenUtau.Core/G2p/Data/g2p-ja-mono.zip create mode 100644 OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs index 344cad86e..c84637a50 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs @@ -1,10 +1,22 @@ using System.Collections.Generic; using OpenUtau.Api; +using OpenUtau.Core.G2p; namespace OpenUtau.Core.DiffSinger { [Phonemizer("DiffSinger Japanese Phonemizer", "DIFFS JA", language: "JA")] - public class DiffSingerJapanesePhonemizer : DiffSingerBasePhonemizer { + public class DiffSingerJapanesePhonemizer : DiffSingerG2pPhonemizer { protected override string GetDictionaryName()=>"dsdict-ja.yaml"; + protected override IG2p LoadBaseG2p() => new JapaneseMonophoneG2p(); + protected override string[] GetBaseG2pVowels() => new string[] { + "A", "AP", "E", "I", "N", "O", "SP", "U", + "a", "e", "i", "o", "u" + }; + + protected override string[] GetBaseG2pConsonants() => new string[] { + "b", "by", "d", "dy", "f", "g", "gw", "gy", "h", "hy", "j", "k", + "kw", "ky", "m", "my", "n", "ny", "p", "py", "r", "ry", "s", "sh", + "t", "ts", "ty", "v", "w", "y", "z" + }; } } diff --git a/OpenUtau.Core/G2p/Data/Resources.Designer.cs b/OpenUtau.Core/G2p/Data/Resources.Designer.cs index ed9d3451e..97165a05b 100644 --- a/OpenUtau.Core/G2p/Data/Resources.Designer.cs +++ b/OpenUtau.Core/G2p/Data/Resources.Designer.cs @@ -120,6 +120,16 @@ internal static byte[] g2p_it { } } + /// + /// Looks up a localized resource of type System.Byte[]. + /// + internal static byte[] g2p_ja_mono { + get { + object obj = ResourceManager.GetObject("g2p-ja-mono", resourceCulture); + return ((byte[])(obj)); + } + } + /// /// Looks up a localized resource of type System.Byte[]. /// @@ -133,9 +143,9 @@ internal static byte[] g2p_jyutping { /// /// Looks up a localized resource of type System.Byte[]. /// - internal static byte[] g2p_man { + internal static byte[] g2p_ko { get { - object obj = ResourceManager.GetObject("g2p-man", resourceCulture); + object obj = ResourceManager.GetObject("g2p-ko", resourceCulture); return ((byte[])(obj)); } } @@ -143,9 +153,9 @@ internal static byte[] g2p_man { /// /// Looks up a localized resource of type System.Byte[]. /// - internal static byte[] g2p_pt { + internal static byte[] g2p_man { get { - object obj = ResourceManager.GetObject("g2p-pt", resourceCulture); + object obj = ResourceManager.GetObject("g2p-man", resourceCulture); return ((byte[])(obj)); } } @@ -153,9 +163,9 @@ internal static byte[] g2p_pt { /// /// Looks up a localized resource of type System.Byte[]. /// - internal static byte[] g2p_ru { + internal static byte[] g2p_pt { get { - object obj = ResourceManager.GetObject("g2p-ru", resourceCulture); + object obj = ResourceManager.GetObject("g2p-pt", resourceCulture); return ((byte[])(obj)); } } @@ -163,9 +173,9 @@ internal static byte[] g2p_ru { /// /// Looks up a localized resource of type System.Byte[]. /// - internal static byte[] g2p_ko { + internal static byte[] g2p_ru { get { - object obj = ResourceManager.GetObject("g2p-ko", resourceCulture); + object obj = ResourceManager.GetObject("g2p-ru", resourceCulture); return ((byte[])(obj)); } } diff --git a/OpenUtau.Core/G2p/Data/Resources.resx b/OpenUtau.Core/G2p/Data/Resources.resx index fe1a0ffa5..c21c5752d 100644 --- a/OpenUtau.Core/G2p/Data/Resources.resx +++ b/OpenUtau.Core/G2p/Data/Resources.resx @@ -133,6 +133,9 @@ g2p-it.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + g2p-ja-mono.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + g2p-jyutping.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 diff --git a/OpenUtau.Core/G2p/Data/g2p-ja-mono.zip b/OpenUtau.Core/G2p/Data/g2p-ja-mono.zip new file mode 100644 index 0000000000000000000000000000000000000000..09070157bd4dd517a588b6f684505f30ef15c177 GIT binary patch literal 11025 zcmbVy1z1#F*EZeVDczt*36et&T{D0~Bi-HI-5@OvHH?HHh?KMlBHc(yNS7cX$T#@t z^FGh@{{R17*FRi`v-Z8$eXq6mIcLtEb&i%QDjFFQ5)u|tVspI#ErXFfj0g!SMi~hS z4}sd+xtrUVJDcD4^z{Ty&FPSulcB8yybO%_CjSoAfh0AOIg!FBmpW$o6&5#64R3wK zUc%saem`RI#ji4cSwF2?Y;bmJ^E7xK?fPAnPWU_N?f%rrwOU+$WR1GGZ1-C6+~>Wc z!6z|Rudiv7!7-&jFp#z<^lpu+gNcsl#@ zA$H*>BDCXQaBJ+2@s$%8fRL)3B!@cd>8}swIu$}bQb0w-f%8>)XAZA_9%X-uX>wev zp=bp{_Hw5hXO+gkASox90wHwFc5aq>Q(wrG6L4Px_=4B+C}t37{+Yd7bZH-3Iz>oo z!&|`kmtIFuf(I*KO=`nMLTMjgI)#rl3*3dH+F*ALwK9FsD9*w|@*Za|YE(bw!E3#* z%d&sbDjfCZN>#W$*y2A4iUj;IJw%lLm;oY6v%ehRT^XI5RQbOmALrJJ^V!PAcqUVM zx0i9(u)&{XWmhtpCvAN*+-+goBR#|3&V;uN!%Swxgm+ph6iWm1XubvL7V3>jb_pw? z+&gGMzOnjNsQ1w^hog`imdB%wS|if8h~h(j01JmVlL)Wa>wb1}eYDi8@t(zGn?=(fRn9@n&N6?%1?!MEKm$b>XrZ|N(A zM9dSCqqXJrc_0@c@-sZ!ZYLHu$_v)g1Roc&pC zqgh4$JY_YODf$iLAx>SV&;4xt2Q}#~zb>YJ(_LQsn)V^m4$3q{)T3QMS#kwPJmGy3 zcM?vuq402!VZb_5IzjvAeZ@;sG3t#67MUTj+9pi>C2dS1CG9Lq0a<$~-#2elmZlat z57W*#8Dh1i$Vvj`^-2N(1d5jspOQee?~0c?i6==^8>H?+gZdVkE`6NKX)GqgrZ+4- zCf@VWP)!89Md@fgi|czzf%_m2 z!inxxZqRlF)eO|aEi1lHn()kIGK|3Pb!qBw_jFRzH8SIG_l2`(a$h!5M;@H7@P$ZV z*}ov0w6Zvk>Kjier%(YDa#5H*q4OP^l;wg6_I*k3tD{us%M5?4%UfeW=l2&}H%nJ9 zdJL^Zgtr-Ij)4d@T|h!&3B9#k;wyE_aWr%Nb=4OA2Kz-}BfXRP6`uZbSG=uma+Hs= zOZ7Y--Zkhn^gb8>ru9X$(x0?{AI`(R>>1U=ySZ_RP2j4Gzu>DOZeoum?N)9$AwX*C zvMUtNVHn>ESxgFSBbPe?K1sD1~(AkTx^OcVhA+EoCr0P>0}y%|=e3ezLm=u9f=tL2N&0f%uK3a59A1eV7Uic)TcQ3;n}02w&+R4n?!lBzV-4wVxBnU0H0AuoOZy% z6N2myMNu5Ms{`pE^xh1s>QzP4V8@Hyl&CKY(gH(H)XAK9CAH}sgW{3fIWMTJJavo| zQr>*x$0BhjV3S|}@PWEE8-Ba6tGn)AeGsx=lX=iC7+w~t^NLOj>k8SaOQjPnE-dv! zOX|>5rWbrnl~bJ$bsS`MXk~TsWpx&0bu>}LOT8o2AtSvFCRlduB;)=xqM8nmTni>W zjM>g~fIE$;llh%bbOORkf~!!CzkATf$F)ZA`-rKzk=NL1-Nbd3xe z9OWCL^eaAAFD6zmYEk7Kl^orvlEgAsrl(9*0&k)c!aT&leAv0h6z(1mT%+@U$W9v* z3$$`uE{3H}`mf080v~ z%)EbcE>lD~x>B;-B1Yy8e=#52)mD)7tmm`IL1g8ugO)J7LFLIq-$AoIAdChKVdEk! zdPwzy0i>iI+4igFKrJ2--yNfB?I9Ewgy6LNmnS=dO zcrs^-06rnV4}JeIAV+(-;dWV_BZsSBQ(OI|;AHF+k+73jRGvIIXi|+c`25oY=a1}G zy0fR2rw__9lyXG`bw?bOI^>xp{hIWiH++|Szts0^REx<2d}Mg5IZ{~T-@2%mXEYvWYV=qAEok+XBb z8=l)wXJK8d4*uBtqc7x58NX7ZFqODP)oR_-_cQ{Jy`$rva?nO)-A-2p-fz>BqJ?a0 zDJ3)|`^<9~o_@-5Xvmf&J$`1(BCkr+4>|!~PX;}>oo%O@9Bd#1Swjf81p)syzq%=!AFt=gg^z;5oMxYL?6vpC zH)^mGr;5cyIU0Gsv*@dBQ^5i8^Ut-So|3zM&(t}JBB+D=cJ49iHR;tsg{#)aQk?Nz zsx1^8s1|L;lJ={?Rkrvgtx7(#Jv#K2KquDtAKHoNHNVh+)OVm0Ri=*2iih!aJ&p0? z)#jclCA7LmFAc_M6wg^(>fZkaYL?JO{01dj>IQy+^wO7ONl$(W?OqcK>&?5>a(I0TVM3#;kh(ya>QB#Ig?U zzYJ!VH2LXvc=Vu2FzL+T%&*Ah)yA_xwHE?oL9l#V(v~9l0(7GvdWN}@1zIMqK8&A! z9?Dbs)2QL6PQyJhQ!_eOs_2Ex+5Lup5G5$tWJzC$Jhlt1cQF zK-IEM7Ib{ST_D~~}0@~WwNHhTIfB}HuO6K7=(L`M>T`l)fJkbqW zse^U0NwQ2jAykdaVM1JyfGO4Q)fhEsFI=7gl!tlKC^a$%t zu4Q_6_ln~3;`L%`*TJSZqZKX~7pIslM#0GqGo~MoJ&p=N9&$zC+J~T`zbOFsnm?3y z^}&evAI|9uLg?)P`@K=Kpz`8Jz$7`Ey^mz47(#&|ap<}2GvfUwX1vXjyH`5jO%9E` zECzB?wr=U&T317Mg?5*I|dzYk*)#qHQcZ49V} zu6Xes%mpR1Y)>qH^<0k&V3)marx^0<>iBD z^m;3kA`|Eu9)!#Y-w{^|hYm-Iz=$M}T$Ch`UXlMcZ-z%24l%gze8D=RZ2=-7u zZglR3XLN?G_^&j|l)rNCk*iW};1gksUiy&T8-DTTqKb0Q=#`m%+t)4Bxa7VUy@`Dw zsEZr(SQe)~D+Y1i=3p(Ug&frBSkXc#Q#Ac89)o6IZM5Jn`b;z4VIzyEm8eVUP=BOq zpa$0eXHuJk?s`NHBu)wHaW5X#v(+)a|R*NJ?JxruAA~_;LpeOW5=o*|4?EB(GGhw518SOg|7loPHLuN$bf2 zvmM!uMyhP7=@x7pu`zqlDHEBhfo;-FTsYtuo=3jdJNd$V-$8leIkWTXM2g0y4!|Yy2*{$;j-FF9_59fOk{RE-Aw(_AkU_4 zFd3U^fs-cQ`p3SXW+DSk=j64QS9w^OW8K*nPPr*u@OYsz!&&YZ9;vcl(9Zhy0e0{b z+rwp`G>f(}8HF%&S&nZhYC4P+CxIqSXGmsTP#HuLrH};VDrerVXXTFThAP4)<5avf zRfYe^iz#z({2`YODH#suTtNHs4FX5?P(IGFSpZjxSP9Szx!N2w;VZSvX2W%r?_;aQ8?ZLz&4`9{$w z|Is2Gi1E5mf)@DCeyMLc=2Fok`7=k^muSHd>DR@66PT>0p8lkNqr;WKFs+q2t+#TY z=8nHh&s^pOFlP0*RE>$syv=GM@-t z7%IXSfdW<3Pz3$I@Eeut9&Y?l$K*Nru@h1YTI4FLidic1cpCEacyz(1M&c@9#Yo^P zV9Utv)Mv%W>h%0WGnYnDB)QE~dH)idoY!%}*#YGR#v`QyAk` zP>=eCpE}Y-71L$PpD?O@2LZL1R()1+-j^Tq{>Gfm1o~JxG1<~2url&zA)*tVuvYY`nfcHT0pwQF>MQ7$-tQ+D? zmJ8xcW-{cQ=D@9uVM!0=$OkM`38M@Ev2X?emn?B8pC4evrO62t>R*dzG2=?iI2HnrfInm~t4G0vcZZ#ItbZ+V<8}Zd` z23ZU$m90dFb47seY~3fWx?S9V+p1mcf7|+97=PQvW(fee5%MiZ~alg9N*UmW?E6fMlmICST zfZ)zMits{}MHO|ppNX|Z){Z1KBrW$q#R*teE>Z?Be^Z)Q?p;Pxj-kWH*pamr5BNmc!W(V#@h>eBk!WLd+mZR_Y~B~;4da4ItuU%V<@#7VI#?5gKWxFexmGw9ap5KyYU!l{alVM-< zj9RnIUo%?Ivbl;i2#*_u{e>gaAN%D-(sbhqq3wlTWfmp^t`xG3eX zP!$%b(&;*0XUYN0f>!@L`xG-ieN3#-Y3GpR5tuNp#IKy3EC)?ZW`iasYd}4dGH~=V z(IYgXw!5mZeSv-e8!+E;=iL~04~-A^By4vZ<* zDMf~nY-0&oNpe^^JZ3SR5u2NTmtK8HWz_UexBd)&VA==p=ACN&nQ_>{w2z!d^-*#+ zPDe)d5yu=(ZP#l(U)-XGcP?$tLDCiziNt?Tf*hSm~^7osG6-dtB% z7jykY6Yw2pU}T>>ol?vtNY!%J5JS^&8E5Xw1QE`({;ZwZaG6`Q@g08snNtKu)gfQ4 z;WB3ZS#{XU=_chj?`AA_1C-vpOF{53A62^`Lb)I!7C8nz|1jNz>TIxlf9^^uqxukc z?&>=NAv}OABhQ`0Smr-G|0jp#t~lz5!7{pmenI_ZCaQR-BZ=c4XQG_rZ1Lzu}EQ z&ULx6Kv;wyysOHq$_}+8prfTR_b)mFsLA^EU-X`xY4_>J(+y!`K~ENGMj-upBK$4+ zQ`I+jq>{w@r2cSh;|6s42`TfKj|O9HcNL5Vn72@^0=3+HN?qxtg2j0q%RK5{=yxLV z%J$x5hx}QtL`li1qNNJ$%;$LtdeUO-XP^woLTC1TC&hfC+mc41{_8(YqxXJlyOrgQ zZ^HN*!}uDg*V>c{d|=5>O)2@x>Y!0VwXv`5hYPA7Wh5Clusc=RawU=TmHH{{@-oC% z&_5VOw{5{G#Ln~A4zik^w>pTopPr2_Hvy?{de6nrF6iz%y>rZSN!;w5w6L_mlT-3} zSlj%P_Uq-4bKA|=q8=CLet_{uarRT%@89!G*<#ZU8H%53FH-P@5Lbtw5h?{K zHN4}2+~O1^UlER7Js7)sbR;_&5&2zu7E zatZ4ot?r<#9$Be`g%DLEB$~QBv~Tfhc?jBS-^jBr@FC_IbHEQ{T8?^s?Hr~249&;e}NGyh)4ve+O{%8d?dZ`o0ndY=>6>e3bH3)znJdUu)eMtcMgInQ$N^q= zt-`7qMU4&ec}>a$PfRvoI1$gC=bCRq%{xE3oBzs@xBdg1r$Z(ZILXba!cAoSf`4Q* z+mACfFG0@k<79SS?R(~{Kmw}(jq4ESEC2)sO7S_2$5&%5m z;$cBuE@7ly5uL(aYn|YgvV^b-bQg>w@-|9QqBe$E(m*0a5rbTm>@z_d<1AV-`Z>x7 z=D5&cJ5C+XSQFyMARcQnYj%$vqnl}L4zp;eh)zvctXEuEU$p!P;y0va9oxf(9MwdY zNrS;C^tuOQDOv$s&#-&t;sn2#_KieM*bLgokJ`t7#I9IqFd)6=W@uo3OFBz=O(9Aw zfn`K0L12V#(n946IkH}P(iISf+Vvw$vI{eerAt3du4}&&H(Y(i!1cjVAY*l96?Sip z{PGtq3xQP8bOVA4^(^7AyOgZK(`XjP7G`x!W3Oj3sh&70ypd2@FNG#?Q04w>eTvmu1okP-Zec z$=+5KGX$m=o63a$M3fo`jGQ7U=mnNuMVXk4W)9S^TF6vH&4j7)wIPPAt$V8;rZ1`+ zUgIVTcod4Zh$r!~*ia_i85O$)m38ZWYB6Fy(5q3g{i%Ev0h$-AfRQk?MZ!HUTU6jE z3nM@?46PlB$*ul?t|HH0aPue5gN4lGWzB6r|;+yz^arAFC)!LXSOed~ietEgMufW&{41Opd zIgv7CXn2gLqMdF^LcA)(?q@$S)!S`qjw{W3!*ktfnHIuO>+Qy?bB{Ldw$S+Q6215G za3Glfwm}n+aMbai(u9of3j=_sVHFJVlEo-85)uw#?`rGfY~^wHs)c$=7eP2bk^BhT zYKf@t5tRKsAxz7xOr{K7r(;RcXRzq1TwHKb&pzMh=|^Zwm`14J3d&oQd!YfzcAdJH z{H^t{fU*Ve7ZA*h%#B?P8%utyH#>*re3#-y-r`6LCAXJnk+|=Js}XK#o8_B#T)P%^w6B9!A&U*;mZ zx2cTisO*t4ft*sYDF-?n(mKr2cfFU@*X+;1BfUQEY=8B0OU0QM-0ZA##5{j&EcBi| zvJ>BB@yI#CtTU2`s+y)cQ4jMzE3hjRn~BJ$ukdZF%(eD4)4_XkQd!1(W7`Fg0O**SUpe{FgsV7Yxbzp}9l}W@F4 z`Cn)e-8~N34?sk>F zpRe8mIy0IBO#-m*^`YX~9_d~qSBRR`z7OM3eUimlsTSojefDE2tb?)Bh+p!AvLAycZ8~v@_dptKOZ-*~(u^mNOB z-xNnV**@*qde%Og5CqlqR`PiR%`zU1k{pd#98KD{wb4AabUC%w$As&Wq-Szjsw{SX zk|Gn$+#&F)5Gt)C;jSd6sid&4q|U2sdC|g!t!z)-kk}HMa8u0SO^KvbUf_9k-e{!s zvAUa|Orc|n6(<&tLQYHY=q%~`@xpwy@U{_c3~i(KS-6(}M-RVj>5Y`YXD*k#&re=o zX9hHXRP{@%IAJ~9FkTwW56pUS$;oiibe*-#>z@J^6xMYFz0#h9)Md}X%t~Oxpbijj zV`3h>oLIbLUdd@vWT%)QJ_zS~TwZ0a#q;nWs0sOl%i3#>^Jav7f1}L#rb+XR`B2)I zDda==jak$M6VBoEoa)}J+n3&Gd`xR^=fd{>WO@z_$5~f1Yw2nay`wLM?XKLH>J~lG zsDeQ=r`6ky(fXg321ZC}Md)(oUR!H^lDb&Od_H8TI%9}yRV7H5b7`|%V0I@keL5{R zH(R&X8;4IzFtj)RSQEv2Xz!)9X`XT7^E+hnu_l4{o~YX|@z$o{jA(jsje^-%tDQs6 z)!!^F`jbgHL>xu=U+eEm42ZgY>8uiie8v7xf*ry?=s5}q*A$C&vfx_Vj<3^|;#I(K z*|LnGXtxZlN)@mcq*+A6z9)_plc4ZJIq4ke7dCf^+nfsnm2ra}YgesLLw~WaU#0UT z{bEmatJH{dgDY{ehB0`WaVqZ|=dJYI)pZbTn5T8(>ld_fzA1<~{?aja{p00yJUI9$ zzaP4zcBa~>^$`}Ar?@{UWnc0}8`CTC>hag%I`f_|uOIHBm)ylh8{E3Nh`5DA$7O)# zL2$VM7$*D3Xaau*RO6misYaf;X_C}hVcws7H)mHJdu>r)qG`SNeoe}WQc%O@{pFJ< zQZcoA{t;zZm)oDalO4!iUXwAMH+uzz!HqWHpt3(edj ze{N7*-Y*QsO$hW@5I0(J#D>K-HxOuQq&<%)QA!! zQ59fxG)9jyVDaOCl6Wk&HoBDq*J^rcg#PrXF$iK8f=w1E!|L@HIdl~Rg zZAoS%r2i}m{-v!uE&0FY{ohCMpV~{D|2m%k5;vT>>ia(?j*eKZAR(b6{>Bgogq!QG F^*@qEJthDE literal 0 HcmV?d00001 diff --git a/OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs b/OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs new file mode 100644 index 000000000..aa4ef203c --- /dev/null +++ b/OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs @@ -0,0 +1,126 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML.OnnxRuntime; +using OpenUtau.Api; +using OpenUtau.Core.Util; + +namespace OpenUtau.Core.G2p { + public class JapaneseMonophoneG2p : G2pPack { + private static readonly string[] graphemes = new string[] { + "", "", "", "", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "m", "n", "o", "p", "r", "s", + "t", "u", "v", "w", "y", "z", "あ", "い", "う", "え", + "お", "ぁ", "ぃ", "ぅ", "ぇ", "ぉ", "か", "き", "く", + "け", "こ", "さ", "し", "す", "せ", "そ", "ざ", "じ", "ず", + "ぜ", "ぞ", "た", "ち", "つ", "て", "と", "だ", "ぢ", "づ", "で", + "ど", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", + "ば", "び", "ぶ", "べ", "ぼ", "ぱ", "ぴ", "ぷ", "ぺ", "ぽ", "ま", + "み", "む", "め", "も", "や", "ゆ", "よ", "ゃ", "ゅ", "ょ", "ら", + "り", "る", "れ", "ろ", "わ", "を", "ん", "っ", "ヴ", "ゔ","゜", + "ア", "イ", "ウ", "エ", "オ", "ァ", "ィ", "ゥ", "ェ", "ォ", "カ", + "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "ザ", "ジ", + "ズ", "ゼ", "ゾ", "タ", "チ", "ツ", "テ", "ト", "ダ", "ヂ", "ヅ", + "デ", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", + "ホ", "バ", "ビ", "ブ", "ベ", "ボ", "パ", "ピ", "プ", "ペ", "ポ", + "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ャ", "ュ", "ョ", + "ラ", "リ", "ル", "レ", "ロ", "ワ", "ヲ", "ン", "ッ", "息", "吸", + "-", "R" + }; + + private static readonly string[] phonemes = new string[] { + "", "", "", "", "A", "AP", "E", "I", "N", "O", "U", + "SP", "a", "b", "ch", "cl", "d", "dy", "e", "f", "g", "gw", + "gy", "h", "hy", "i", "j", "k", "kw", "ky", "m", "my", "n", + "ng", "ny", "o", "p", "py", "r", "ry", "s", "sh", "t", "ts", + "ty", "u", "v", "w", "y", "z" + }; + + private static object lockObj = new object(); + private static Dictionary graphemeIndexes; + private static IG2p hiragana; + private static IG2p katakana; + private static IG2p romaji; + private static IG2p special; + private static InferenceSession session; + private static Dictionary predCache = new Dictionary(); + + protected Tuple LoadPack( + byte[] data, + Func prepGrapheme = null, + Func prepPhoneme = null) { + prepGrapheme = prepGrapheme ?? ((string s) => s); + prepPhoneme = prepPhoneme ?? ((string s) => s); + string[] hiraganaTxt = Zip.ExtractText(data, "hiragana.txt"); + string[] katakanaTxt = Zip.ExtractText(data, "katakana.txt"); + string[] romajiTxt = Zip.ExtractText(data, "romaji.txt"); + string[] specialTxt = Zip.ExtractText(data, "special.txt"); + string[] phonesTxt = Zip.ExtractText(data, "phones.txt"); + var builder = G2pDictionary.NewBuilder(); + phonesTxt.Select(line => line.Trim()) + .Select(line => line.Split()) + .Where(parts => parts.Length == 2) + .ToList() + .ForEach(parts => builder.AddSymbol(prepPhoneme(parts[0]), parts[1])); + hiraganaTxt.Where(line => !line.StartsWith(";;;")) + .Select(line => line.Trim()) + .Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) + .Where(parts => parts.Length == 2) + .ToList() + .ForEach(parts => builder.AddEntry( + prepGrapheme(parts[0]), + parts[1].Split().Select(symbol => prepPhoneme(symbol)))); + katakanaTxt.Where(line => !line.StartsWith(";;;")) + .Select(line => line.Trim()) + .Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) + .Where(parts => parts.Length == 2) + .ToList() + .ForEach(parts => builder.AddEntry( + prepGrapheme(parts[0]), + parts[1].Split().Select(symbol => prepPhoneme(symbol)))); + romajiTxt.Where(line => !line.StartsWith(";;;")) + .Select(line => line.Trim()) + .Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) + .Where(parts => parts.Length == 2) + .ToList() + .ForEach(parts => builder.AddEntry( + prepGrapheme(parts[0]), + parts[1].Split().Select(symbol => prepPhoneme(symbol)))); + specialTxt.Where(line => !line.StartsWith(";;;")) + .Select(line => line.Trim()) + .Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) + .Where(parts => parts.Length == 2) + .ToList() + .ForEach(parts => builder.AddEntry( + prepGrapheme(parts[0]), + parts[1].Split().Select(symbol => prepPhoneme(symbol)))); + var dict = builder.Build(); + return Tuple.Create((IG2p) dict, session); + } + + public JapaneseMonophoneG2p() { + lock (lockObj) { + if (graphemeIndexes == null) { + graphemeIndexes = graphemes + .Skip(4) + .Select((g, i) => Tuple.Create(g, i)) + .ToDictionary(t => t.Item1, t => t.Item2 + 4); + var tuple = LoadPack(Data.Resources.g2p_ja_mono); + hiragana = tuple.Item1; + katakana = tuple.Item1; + romaji = tuple.Item1; + special = tuple.Item1; + session = tuple.Item2; + } + } + GraphemeIndexes = graphemeIndexes; + Phonemes = phonemes; + Session = session; + Dict = hiragana; + Dict = katakana; + Dict = romaji; + Dict = special; + PredCache = predCache; + } + } +} From 6a7d0ad405659b2a19f7a95f7bd3a23b587e5d00 Mon Sep 17 00:00:00 2001 From: Lotte V Date: Sun, 19 May 2024 00:45:26 +0200 Subject: [PATCH 2/5] Add workaround for dash notes (the G2P doesn't recognize it for some reason) --- .../DiffSingerJapanesePhonemizer.cs | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs index c84637a50..0681d2db1 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs @@ -1,5 +1,6 @@ +using System; using System.Collections.Generic; - +using System.Linq; using OpenUtau.Api; using OpenUtau.Core.G2p; @@ -18,5 +19,22 @@ public class DiffSingerJapanesePhonemizer : DiffSingerG2pPhonemizer { "kw", "ky", "m", "my", "n", "ny", "p", "py", "r", "ry", "s", "sh", "t", "ts", "ty", "v", "w", "y", "z" }; + + public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { + if (notes[0].lyric == "-") { + return MakeSimpleResult("SP"); + } + if (!partResult.TryGetValue(notes[0].position, out var phonemes)) { + throw new Exception("Part result not found"); + } + return new Result { + phonemes = phonemes + .Select((tu) => new Phoneme() { + phoneme = tu.Item1, + position = tu.Item2, + }) + .ToArray(), + }; + } } } From 28ca7f34abedf8976da558a89c52fa797ecc47bc Mon Sep 17 00:00:00 2001 From: Lotte V Date: Sun, 19 May 2024 00:48:45 +0200 Subject: [PATCH 3/5] Add some things I forgot --- OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs b/OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs index aa4ef203c..d77b306c4 100644 --- a/OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs +++ b/OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs @@ -18,14 +18,14 @@ public class JapaneseMonophoneG2p : G2pPack { "ば", "び", "ぶ", "べ", "ぼ", "ぱ", "ぴ", "ぷ", "ぺ", "ぽ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ゃ", "ゅ", "ょ", "ら", "り", "る", "れ", "ろ", "わ", "を", "ん", "っ", "ヴ", "ゔ","゜", - "ア", "イ", "ウ", "エ", "オ", "ァ", "ィ", "ゥ", "ェ", "ォ", "カ", - "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "ザ", "ジ", - "ズ", "ゼ", "ゾ", "タ", "チ", "ツ", "テ", "ト", "ダ", "ヂ", "ヅ", - "デ", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", - "ホ", "バ", "ビ", "ブ", "ベ", "ボ", "パ", "ピ", "プ", "ペ", "ポ", - "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ャ", "ュ", "ョ", - "ラ", "リ", "ル", "レ", "ロ", "ワ", "ヲ", "ン", "ッ", "息", "吸", - "-", "R" + "ゐ", "ゑ", "ア", "イ", "ウ", "エ", "オ", "ァ", "ィ", "ゥ", "ェ", + "ォ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", + "ザ", "ジ", "ズ", "ゼ", "ゾ", "タ", "チ", "ツ", "テ", "ト", "ダ", + "ヂ", "ヅ", "デ", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", + "フ", "ヘ", "ホ", "バ", "ビ", "ブ", "ベ", "ボ", "パ", "ピ", "プ", + "ペ", "ポ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ャ", + "ュ", "ョ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ヲ", "ン", "ッ", + "ヰ", "ヱ", "息", "吸", "-", "R" }; private static readonly string[] phonemes = new string[] { From 1c30ebcd6f3d0a495147891e067ffa30c6dcc8ff Mon Sep 17 00:00:00 2001 From: Lotte V Date: Sun, 19 May 2024 00:58:10 +0200 Subject: [PATCH 4/5] Add some more stuff I forgot --- .../DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs index 0681d2db1..2b7d1c2b3 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs @@ -16,8 +16,8 @@ public class DiffSingerJapanesePhonemizer : DiffSingerG2pPhonemizer { protected override string[] GetBaseG2pConsonants() => new string[] { "b", "by", "d", "dy", "f", "g", "gw", "gy", "h", "hy", "j", "k", - "kw", "ky", "m", "my", "n", "ny", "p", "py", "r", "ry", "s", "sh", - "t", "ts", "ty", "v", "w", "y", "z" + "kw", "ky", "m", "my", "n", "ng", "ngy", "ny", "p", "py", "r", "ry", + "s", "sh", "t", "ts", "ty", "v", "w", "y", "z" }; public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { From 3c63ca745997bbd85fd5fe8f1741c7220510b0d2 Mon Sep 17 00:00:00 2001 From: Lotte V Date: Sun, 19 May 2024 00:59:29 +0200 Subject: [PATCH 5/5] Add yet even more stuff I forgot --- .../DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs index 2b7d1c2b3..6e7d8ebf4 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs @@ -15,9 +15,9 @@ public class DiffSingerJapanesePhonemizer : DiffSingerG2pPhonemizer { }; protected override string[] GetBaseG2pConsonants() => new string[] { - "b", "by", "d", "dy", "f", "g", "gw", "gy", "h", "hy", "j", "k", - "kw", "ky", "m", "my", "n", "ng", "ngy", "ny", "p", "py", "r", "ry", - "s", "sh", "t", "ts", "ty", "v", "w", "y", "z" + "b", "by", "ch", "cl", "d", "dy", "f", "g", "gw", "gy", "h", "hy", + "j", "k", "kw", "ky", "m", "my", "n", "ng", "ngy", "ny", "p", "py", + "r", "ry", "s", "sh", "t", "ts", "ty", "v", "w", "y", "z" }; public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) {