Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP]Support ENUNUServer0.4.0 #1282

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions OpenUtau.Core/Enunu/EnunuClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,19 @@
namespace OpenUtau.Core.Enunu {
class EnunuClient : Util.SingletonBase<EnunuClient> {
internal T SendRequest<T>(string[] args) {
return SendRequest<T>(args, "15555");
}
internal T SendRequest<T>(string[] args, string port,int second = 300 ) {
using (var client = new RequestSocket()) {
client.Connect("tcp://localhost:15555");
client.Connect($"tcp://localhost:{port}");
string request = JsonConvert.SerializeObject(args);
Log.Information($"EnunuProcess sending {request}");
client.SendFrame(request);
client.TryReceiveFrameString(TimeSpan.FromSeconds(300), out string? message);
client.TryReceiveFrameString(TimeSpan.FromSeconds(second), out string? message);
Log.Information($"EnunuProcess received {message}");
if (string.IsNullOrEmpty(message)) {
return (T)Activator.CreateInstance(typeof(T))!;
}
return JsonConvert.DeserializeObject<T>(message ?? string.Empty)!;
}
}
Expand Down
18 changes: 14 additions & 4 deletions OpenUtau.Core/Enunu/EnunuConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@

namespace OpenUtau.Core.Enunu {
class EnunuConfig {
public string tablePath;
public string questionPath;
public string enunu_type = string.Empty;

public string feature_type = string.Empty;
public string tablePath = string.Empty;
public string questionPath = string.Empty;
public int sampleRate;
public double framePeriod;
public EnunuExtensions extensions;
Expand All @@ -16,8 +19,10 @@ public static EnunuConfig Load(USinger singer) {
if (File.Exists(configPath)) {
var configTxt = File.ReadAllText(configPath);
config = Yaml.DefaultDeserializer.Deserialize<RawEnunuConfig>(configTxt);
config.enunu_type = "ENUNU";
} else {
config = SetSimpleENUNUConfig(singer.Location);
config.enunu_type = "SimpleENUNU";
}
return config.Convert();
}
Expand Down Expand Up @@ -70,14 +75,19 @@ class RawEnunuExtensions {
}

class RawEnunuConfig {
public string tablePath;
public string questionPath;
public string enunu_type = string.Empty;
public string feature_type = string.Empty;
public string tablePath = string.Empty;
public string questionPath = string.Empty;
public int sampleRate;
public double framePeriod;
public RawEnunuExtensions extensions;

public EnunuConfig Convert() {
EnunuConfig enunuConfig = new EnunuConfig();
enunuConfig.enunu_type = this.enunu_type;

enunuConfig.feature_type = this.feature_type;
enunuConfig.tablePath = this.tablePath;
enunuConfig.questionPath = this.questionPath;
enunuConfig.sampleRate = this.sampleRate;
Expand Down
3 changes: 3 additions & 0 deletions OpenUtau.Core/Enunu/EnunuEnglishPhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ protected IG2p LoadG2p() {
public override void SetSinger(USinger singer) {
this.singer = singer as EnunuSinger;
g2p = LoadG2p();
if (port == null) {
port = EnunuUtils.SetPortNum();
}
}

string[] GetSymbols(Note note) {
Expand Down
7 changes: 6 additions & 1 deletion OpenUtau.Core/Enunu/EnunuPhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ public class EnunuPhonemizer : Phonemizer {
readonly string PhonemizerType = "ENUNU";

protected EnunuSinger singer;
protected string port;
Dictionary<Note[], Phoneme[]> partResult = new Dictionary<Note[], Phoneme[]>();

struct TimingResult {
Expand All @@ -27,6 +28,9 @@ struct TimingResponse {

public override void SetSinger(USinger singer) {
this.singer = singer as EnunuSinger;
if (port == null) {
port = EnunuUtils.SetPortNum();
}
}

public override void SetUp(Note[][] notes, UProject project, UTrack track) {
Expand All @@ -42,9 +46,10 @@ public override void SetUp(Note[][] notes, UProject project, UTrack track) {
var scorePath = Path.Join(enutmpPath, $"score.lab");
var timingPath = Path.Join(enutmpPath, $"timing.lab");
var enunuNotes = NoteGroupsToEnunu(notes);
var voicebankNameHash = $"{this.singer.voicebankNameHash:x16}";
if (!File.Exists(scorePath) || !File.Exists(timingPath)) {
EnunuUtils.WriteUst(enunuNotes, bpm, singer, ustPath);
var response = EnunuClient.Inst.SendRequest<TimingResponse>(new string[] { "timing", ustPath });
var response = EnunuClient.Inst.SendRequest<TimingResponse>(new string[] { "timing", ustPath,"", voicebankNameHash, "600" }, port);
if (response.error != null) {
throw new Exception(response.error);
}
Expand Down
164 changes: 103 additions & 61 deletions OpenUtau.Core/Enunu/EnunuRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using K4os.Hash.xxHash;
using NAudio.Wave;
using NumSharp;
using OpenUtau.Core.Format;
Expand All @@ -16,6 +17,8 @@ namespace OpenUtau.Core.Enunu {
public class EnunuRenderer : IRenderer {
public const int headTicks = 240;
public const int tailTicks = 240;
protected string port;
private EnunuConfig config;

static readonly HashSet<string> supportedExp = new HashSet<string>(){
Format.Ustx.DYN,
Expand All @@ -25,13 +28,16 @@ public class EnunuRenderer : IRenderer {
Format.Ustx.BREC,
Format.Ustx.TENC,
Format.Ustx.VOIC,
Format.Ustx.SHFT

};

struct AcousticResult {
public string path_acoustic;
public string path_f0;
public string path_spectrogram;
public string path_aperiodicity;
public string path_mel;
public string path_vuv;
}

struct AcousticResponse {
Expand Down Expand Up @@ -76,75 +82,99 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
}
string progressInfo = $"Track {trackNo + 1}: {this} \"{string.Join(" ", phrase.phones.Select(p => p.phoneme))}\"";
progress.Complete(0, progressInfo);
var tmpPath = Path.Join(PathManager.Inst.CachePath, $"enu-{phrase.preEffectHash:x16}");
ulong hash = HashPhraseGroups(phrase);
var tmpPath = Path.Join(PathManager.Inst.CachePath, $"enu-{hash:x16}");
var ustPath = tmpPath + ".tmp";
var enutmpPath = tmpPath + "_enutemp";
var wavPath = Path.Join(PathManager.Inst.CachePath, $"enu-{phrase.hash:x16}.wav");
var voicebankNameHash = $"{(phrase.singer as EnunuSinger).voicebankNameHash:x16}";
config = EnunuConfig.Load(phrase.singer);
if (port == null) {
port = EnunuUtils.SetPortNum();
}
var result = Layout(phrase);
if (!File.Exists(wavPath)) {
var config = EnunuConfig.Load(phrase.singer);
if (config.extensions.wav_synthesizer.Contains("synthe")) {
Log.Information($"Starting enunu synthesis \"{ustPath}\"");
var enunuNotes = PhraseToEnunuNotes(phrase);
// TODO: using first note tempo as ust tempo.
EnunuUtils.WriteUst(enunuNotes, phrase.phones.First().tempo, phrase.singer, ustPath);
var response = EnunuClient.Inst.SendRequest<SyntheResponse>(new string[] { "synthe", ustPath, wavPath });
if (response.error != null) {
throw new Exception(response.error);
if (config.extensions.wav_synthesizer.Contains("synthe") || config.feature_type.Equals("melf0")) {
var f0Path = Path.Join(enutmpPath, "f0.npy");
var editorf0Path = Path.Join(enutmpPath, "editorf0.npy");
var melPath = Path.Join(enutmpPath, "mel.npy");
var vuvPath = Path.Join(enutmpPath, "vuv.npy");
if (!File.Exists(f0Path) || !File.Exists(melPath) || !File.Exists(vuvPath)) {
Log.Information($"Starting enunu synthesis \"{ustPath}\"");
var enunuNotes = PhraseToEnunuNotes(phrase);
// TODO: using first note tempo as ust tempo.
EnunuUtils.WriteUst(enunuNotes, phrase.phones.First().tempo, phrase.singer, ustPath);
var ac_response = EnunuClient.Inst.SendRequest<AcousticResponse>(new string[] { "acoustic", ustPath, "", voicebankNameHash, "600", phrase.phones[0].toneShift.ToString() }, port);
if (ac_response.error != null) {
Log.Error(ac_response.error);
}
}
var f0 = np.Load<double[]>(f0Path);
int totalFrames = f0.Length;
var headMs = phrase.positionMs - phrase.timeAxis.TickPosToMsPos(phrase.position - headTicks);
var tailMs = phrase.timeAxis.TickPosToMsPos(phrase.end + tailTicks) - phrase.endMs;
int headFrames = (int)Math.Round(headMs / config.framePeriod);
int tailFrames = (int)Math.Round(tailMs / config.framePeriod);
var editorF0 = SampleCurve(phrase, phrase.pitches, 0, config.framePeriod, totalFrames, headFrames, tailFrames, x => MusicMath.ToneToFreq(x * 0.01));
np.Save(editorF0, editorf0Path);
SyntheResponse sy_response = new SyntheResponse();
sy_response = EnunuClient.Inst.SendRequest<SyntheResponse>(new string[] { "synthe", ustPath, wavPath, voicebankNameHash, "600", phrase.phones[0].toneShift.ToString() }, port);
if (sy_response.error != null) {
throw new Exception(sy_response.error);
}
} else {
var f0Path = Path.Join(enutmpPath, "f0.npy");
var spPath = Path.Join(enutmpPath, "spectrogram.npy");
var apPath = Path.Join(enutmpPath, "aperiodicity.npy");
if (!File.Exists(f0Path) || !File.Exists(spPath) || !File.Exists(apPath)) {
Log.Information($"Starting enunu acoustic \"{ustPath}\"");
var enunuNotes = PhraseToEnunuNotes(phrase);
// TODO: using first note tempo as ust tempo.
EnunuUtils.WriteUst(enunuNotes, phrase.phones.First().tempo, phrase.singer, ustPath);
var response = EnunuClient.Inst.SendRequest<AcousticResponse>(new string[] { "acoustic", ustPath });
if (response.error != null) {
throw new Exception(response.error);
var f0Path = Path.Join(enutmpPath, "f0.npy");
var spPath = Path.Join(enutmpPath, "spectrogram.npy");
var apPath = Path.Join(enutmpPath, "aperiodicity.npy");
if (!File.Exists(f0Path) || !File.Exists(spPath) || !File.Exists(apPath)) {
Log.Information($"Starting enunu acoustic \"{ustPath}\"");
var enunuNotes = PhraseToEnunuNotes(phrase);
// TODO: using first note tempo as ust tempo.
EnunuUtils.WriteUst(enunuNotes, phrase.phones.First().tempo, phrase.singer, ustPath);
var ac_response = EnunuClient.Inst.SendRequest<AcousticResponse>(new string[] { "acoustic", ustPath, "", voicebankNameHash, "600", phrase.phones[0].toneShift.ToString() }, port);
if (ac_response.error != null) {
throw new Exception(ac_response.error);
}
}
}
if (cancellation.IsCancellationRequested) {
return new RenderResult();
}
var f0 = np.Load<double[]>(f0Path);
var sp = np.Load<double[,]>(spPath);
var ap = np.Load<double[,]>(apPath);
int totalFrames = f0.Length;
var headMs = phrase.positionMs - phrase.timeAxis.TickPosToMsPos(phrase.position - headTicks);
var tailMs = phrase.timeAxis.TickPosToMsPos(phrase.end + tailTicks) - phrase.endMs;
int headFrames = (int)Math.Round(headMs / config.framePeriod);
int tailFrames = (int)Math.Round(tailMs / config.framePeriod);
var editorF0 = SampleCurve(phrase, phrase.pitches, 0, config.framePeriod, totalFrames, headFrames, tailFrames, x => MusicMath.ToneToFreq(x * 0.01));
var gender = SampleCurve(phrase, phrase.gender, 0.5, config.framePeriod, totalFrames, headFrames, tailFrames, x => 0.5 + 0.005 * x);
var tension = SampleCurve(phrase, phrase.tension, 0.5, config.framePeriod, totalFrames, headFrames, tailFrames, x => 0.5 + 0.005 * x);
var breathiness = SampleCurve(phrase, phrase.breathiness, 0.5, config.framePeriod, totalFrames, headFrames, tailFrames, x => 0.5 + 0.005 * x);
var voicing = SampleCurve(phrase, phrase.voicing, 1.0, config.framePeriod, totalFrames, headFrames, tailFrames, x => 0.01 * x);
int fftSize = (sp.GetLength(1) - 1) * 2;
for (int i = 0; i < f0.Length; i++) {
if (f0[i] < 50) {
editorF0[i] = 0;
if (cancellation.IsCancellationRequested) {
return new RenderResult();
}
var f0 = np.Load<double[]>(f0Path);
var sp = np.Load<double[,]>(spPath);
var ap = np.Load<double[,]>(apPath);
int totalFrames = f0.Length;
var headMs = phrase.positionMs - phrase.timeAxis.TickPosToMsPos(phrase.position - headTicks);
var tailMs = phrase.timeAxis.TickPosToMsPos(phrase.end + tailTicks) - phrase.endMs;
int headFrames = (int)Math.Round(headMs / config.framePeriod);
int tailFrames = (int)Math.Round(tailMs / config.framePeriod);
var editorF0 = SampleCurve(phrase, phrase.pitches, 0, config.framePeriod, totalFrames, headFrames, tailFrames, x => MusicMath.ToneToFreq(x * 0.01));
var gender = SampleCurve(phrase, phrase.gender, 0.5, config.framePeriod, totalFrames, headFrames, tailFrames, x => 0.5 + 0.005 * x);
var tension = SampleCurve(phrase, phrase.tension, 0.5, config.framePeriod, totalFrames, headFrames, tailFrames, x => 0.5 + 0.005 * x);
var breathiness = SampleCurve(phrase, phrase.breathiness, 0.5, config.framePeriod, totalFrames, headFrames, tailFrames, x => 0.5 + 0.005 * x);
var voicing = SampleCurve(phrase, phrase.voicing, 1.0, config.framePeriod, totalFrames, headFrames, tailFrames, x => 0.01 * x);
int fftSize = (sp.GetLength(1) - 1) * 2;
for (int i = 0; i < f0.Length; i++) {
if (f0[i] < 50) {
editorF0[i] = 0;
}
}
var samples = Worldline.WorldSynthesis(
editorF0,
sp, false, sp.GetLength(1),
ap, false, fftSize,
config.framePeriod, config.sampleRate,
gender, tension, breathiness, voicing);
result.samples = samples.Select(d => (float)d).ToArray();
Wave.CorrectSampleScale(result.samples);
if (config.sampleRate != 44100) {
var signal = new NWaves.Signals.DiscreteSignal(config.sampleRate, result.samples);
signal = NWaves.Operations.Operation.Resample(signal, 44100);
result.samples = signal.Samples;
}
var source = new WaveSource(0, 0, 0, 1);
source.SetSamples(result.samples);
WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0));
}
var samples = Worldline.WorldSynthesis(
editorF0,
sp, false, sp.GetLength(1),
ap, false, fftSize,
config.framePeriod, config.sampleRate,
gender, tension, breathiness, voicing);
result.samples = samples.Select(d => (float)d).ToArray();
Wave.CorrectSampleScale(result.samples);
if (config.sampleRate != 44100) {
var signal = new NWaves.Signals.DiscreteSignal(config.sampleRate, result.samples);
signal = NWaves.Operations.Operation.Resample(signal, 44100);
result.samples = signal.Samples;
}
var source = new WaveSource(0, 0, 0, 1);
source.SetSamples(result.samples);
WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0));
}
}
progress.Complete(phrase.phones.Length, progressInfo);
if (File.Exists(wavPath)) {
Expand Down Expand Up @@ -184,7 +214,8 @@ double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, do
}

public RenderPitchResult LoadRenderedPitch(RenderPhrase phrase) {
var tmpPath = Path.Join(PathManager.Inst.CachePath, $"enu-{phrase.preEffectHash:x16}");
ulong hash = HashPhraseGroups(phrase);
var tmpPath = Path.Join(PathManager.Inst.CachePath, $"enu-{hash:x16}");
var enutmpPath = tmpPath + "_enutemp";
var f0Path = Path.Join(enutmpPath, "f0.npy");
if (!File.Exists(f0Path)) {
Expand Down Expand Up @@ -233,5 +264,16 @@ public UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSe
}

public override string ToString() => Renderers.ENUNU;


ulong HashPhraseGroups(RenderPhrase phrase) {
using (var stream = new MemoryStream()) {
using (var writer = new BinaryWriter(stream)) {
writer.Write(phrase.preEffectHash);
writer.Write(phrase.phones[0].toneShift);
return XXH64.DigestOf(stream.ToArray());
}
}
}
}
}
Loading
Loading