Skip to content

Commit

Permalink
Merge branch 'stakira:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
oxygen-dioxide authored Apr 21, 2024
2 parents f13ca18 + 893204e commit 255cb56
Show file tree
Hide file tree
Showing 65 changed files with 1,214 additions and 550 deletions.
1 change: 1 addition & 0 deletions OpenUtau.Core/Classic/ClassicSinger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public class ClassicSinger : USinger {
OtoWatcher otoWatcher;

public bool? UseFilenameAsAlias { get => voicebank.UseFilenameAsAlias; set => voicebank.UseFilenameAsAlias = value; }
public Dictionary<string, Frq> Frqs { get; set; } = new Dictionary<string, Frq>();

public ClassicSinger(Voicebank voicebank) {
this.voicebank = voicebank;
Expand Down
93 changes: 92 additions & 1 deletion OpenUtau.Core/Classic/Frq.cs
Original file line number Diff line number Diff line change
@@ -1,17 +1,107 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using NAudio.Wave;
using OpenUtau.Core;
using OpenUtau.Core.Ustx;

namespace OpenUtau.Classic {
public class OtoFrq {
public double[] toneDiffFix = new double[0];
public double[] toneDiffStretch = new double[0];
public int hopSize;
public bool loaded = false;

public OtoFrq(UOto oto, Dictionary<string, Frq> dict) {
if (!dict.TryGetValue(oto.File, out var frq)) {
frq = new Frq();
if (frq.Load(oto.File)){
dict.Add(oto.File, frq);
} else {
frq = null;
}
}
if(frq != null && frq.wavSampleLength != - 1) {
this.hopSize = frq.hopSize;

if (frq.wavSampleLength == 0) {
try {
using (var waveStream = Core.Format.Wave.OpenFile(oto.File)) {
var sampleProvider = waveStream.ToSampleProvider();
if (sampleProvider.WaveFormat.SampleRate == 44100) {
frq.wavSampleLength = Core.Format.Wave.GetSamples(sampleProvider).Length;
} else {
frq.wavSampleLength = -1;
}
}
} catch {
frq.wavSampleLength = - 1;
}
}

if (frq.wavSampleLength > 0) {
int offset = (int)Math.Floor(oto.Offset * 44100 / 1000 / frq.hopSize); // frq samples
int consonant = (int)Math.Floor((oto.Offset + oto.Consonant) * 44100 / 1000 / frq.hopSize);
int cutoff = oto.Cutoff < 0 ?
(int)Math.Floor((oto.Offset - oto.Cutoff) * 44100 / 1000 / frq.hopSize)
: frq.wavSampleLength - (int)Math.Floor(oto.Cutoff * 44100 / 1000 / frq.hopSize);
var completionF0 = Completion(frq.f0);
var averageTone = MusicMath.FreqToTone(frq.averageF0);
toneDiffFix = completionF0.Skip(offset).Take(consonant - offset).Select(f => MusicMath.FreqToTone(f) - averageTone).ToArray();
toneDiffStretch = completionF0.Skip(consonant).Take(cutoff - consonant).Select(f => MusicMath.FreqToTone(f) - averageTone).ToArray();

loaded = true;
}
}
}

private double[] Completion(double[] frqs) {
var list = new List<double>();
for (int i = 0; i < frqs.Length; i++) {
if (frqs[i] <= 60) {
int min = i - 1;
double minFrq = 0;
while (min >= 0) {
if (frqs[min] > 60) {
minFrq = frqs[min];
break;
}
min--;
}
int max = i + 1;
double maxFrq = 0;
while (max < frqs.Length) {
if (frqs[max] > 60) {
maxFrq = frqs[max];
break;
}
max++;
}
if (minFrq <= 60) {
list.Add(maxFrq);
} else if (maxFrq <= 60) {
list.Add(minFrq);
} else {
list.Add(MusicMath.Linear(min, max, minFrq, maxFrq, i));
}
} else {
list.Add(frqs[i]);
}
}
return list.ToArray();
}
}

public class Frq {
public const int kHopSize = 256;

public int hopSize;
public double averageF0;
public double[] f0 = new double[0];
public double[] amp = new double[0];
public int wavSampleLength = 0;

/// <summary>
/// If the wav path is null (machine learning voicebank), return false.
Expand Down Expand Up @@ -45,7 +135,8 @@ public bool Load(string wavPath) {
}
return true;
} catch (Exception e) {
DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("failed to load frq file", e));
var customEx = new MessageCustomizableException("Failed to load frq file", "<translate:errors.failed.load>: frq file", e);
DocManager.Inst.ExecuteCmd(new ErrorMessageNotification(customEx));
return false;
}
}
Expand Down
12 changes: 11 additions & 1 deletion OpenUtau.Core/Commands/Notifications.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,17 @@ public ErrorMessageNotification(string message, Exception e) {
this.message = message;
this.e = e;
}
public override string ToString() => $"Error message: {message} {e}";
public override string ToString() {
if (e is MessageCustomizableException mce) {
if (string.IsNullOrWhiteSpace(mce.Message)) {
return $"Error message: {mce.SubstanceException.Message} {mce.SubstanceException}";
} else {
return $"Error message: {mce.Message} {mce.SubstanceException}";
}
} else {
return $"Error message: {message} {e}";
}
}
}

public class LoadingNotification : UNotification {
Expand Down
30 changes: 26 additions & 4 deletions OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using K4os.Hash.xxHash;
using Serilog;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;

using OpenUtau.Api;
using OpenUtau.Core.Ustx;
using OpenUtau.Core.Util;

namespace OpenUtau.Core.DiffSinger
{
Expand All @@ -17,6 +19,8 @@ public abstract class DiffSingerBasePhonemizer : MachineLearningPhonemizer
DsConfig dsConfig;
string rootPath;
float frameMs;
ulong linguisticHash;
ulong durationHash;
InferenceSession linguisticModel;
InferenceSession durationModel;
IG2p g2p;
Expand Down Expand Up @@ -51,14 +55,18 @@ public override void SetSinger(USinger singer) {
//Load models
var linguisticModelPath = Path.Join(rootPath, dsConfig.linguistic);
try {
linguisticModel = new InferenceSession(linguisticModelPath);
var linguisticModelBytes = File.ReadAllBytes(linguisticModelPath);
linguisticHash = XXH64.DigestOf(linguisticModelBytes);
linguisticModel = new InferenceSession(linguisticModelBytes);
} catch (Exception e) {
Log.Error(e, $"failed to load linguistic model from {linguisticModelPath}");
return;
}
var durationModelPath = Path.Join(rootPath, dsConfig.dur);
try {
durationModel = new InferenceSession(durationModelPath);
var durationModelBytes = File.ReadAllBytes(durationModelPath);
durationHash = XXH64.DigestOf(durationModelBytes);
durationModel = new InferenceSession(durationModelBytes);
} catch (Exception e) {
Log.Error(e, $"failed to load duration model from {durationModelPath}");
return;
Expand Down Expand Up @@ -260,7 +268,14 @@ protected override void ProcessPart(Note[][] phrase) {
new DenseTensor<Int64>(word_dur, new int[] { word_dur.Length }, false)
.Reshape(new int[] { 1, word_dur.Length })));
Onnx.VerifyInputNames(linguisticModel, linguisticInputs);
var linguisticOutputs = linguisticModel.Run(linguisticInputs);
var linguisticCache = Preferences.Default.DiffSingerTensorCache
? new DiffSingerCache(linguisticHash, linguisticInputs)
: null;
var linguisticOutputs = linguisticCache?.Load();
if (linguisticOutputs is null) {
linguisticOutputs = linguisticModel.Run(linguisticInputs).Cast<NamedOnnxValue>().ToList();
linguisticCache?.Save(linguisticOutputs);
}
Tensor<float> encoder_out = linguisticOutputs
.Where(o => o.Name == "encoder_out")
.First()
Expand Down Expand Up @@ -291,7 +306,14 @@ protected override void ProcessPart(Note[][] phrase) {
durationInputs.Add(NamedOnnxValue.CreateFromTensor("spk_embed", spkEmbedTensor));
}
Onnx.VerifyInputNames(durationModel, durationInputs);
var durationOutputs = durationModel.Run(durationInputs);
var durationCache = Preferences.Default.DiffSingerTensorCache
? new DiffSingerCache(durationHash, durationInputs)
: null;
var durationOutputs = durationCache?.Load();
if (durationOutputs is null) {
durationOutputs = durationModel.Run(durationInputs).Cast<NamedOnnxValue>().ToList();
durationCache?.Save(durationOutputs);
}
List<double> durationFrames = durationOutputs.First().AsTensor<float>().Select(x=>(double)x).ToList();

//Alignment
Expand Down
Loading

0 comments on commit 255cb56

Please sign in to comment.