Merge branch 'stakira:master' into master

xunmengshe · Apr 21, 2024 · 255cb56 · 255cb56
2 parents f13ca18 + 893204e
commit 255cb56
Show file tree

Hide file tree

Showing 65 changed files with 1,214 additions and 550 deletions.
diff --git a/OpenUtau.Core/Classic/ClassicSinger.cs b/OpenUtau.Core/Classic/ClassicSinger.cs
@@ -44,6 +44,7 @@ public class ClassicSinger : USinger {
         OtoWatcher otoWatcher;
 
         public bool? UseFilenameAsAlias { get => voicebank.UseFilenameAsAlias; set => voicebank.UseFilenameAsAlias = value; }
+        public Dictionary<string, Frq> Frqs { get; set; } = new Dictionary<string, Frq>();
 
         public ClassicSinger(Voicebank voicebank) {
             this.voicebank = voicebank;

diff --git a/OpenUtau.Core/Classic/Frq.cs b/OpenUtau.Core/Classic/Frq.cs
@@ -1,17 +1,107 @@
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Linq;
 using System.Text;
+using NAudio.Wave;
 using OpenUtau.Core;
+using OpenUtau.Core.Ustx;
 
 namespace OpenUtau.Classic {
+    public class OtoFrq {
+        public double[] toneDiffFix = new double[0];
+        public double[] toneDiffStretch = new double[0];
+        public int hopSize;
+        public bool loaded = false;
+
+        public OtoFrq(UOto oto, Dictionary<string, Frq> dict) {
+            if (!dict.TryGetValue(oto.File, out var frq)) {
+                frq = new Frq();
+                if (frq.Load(oto.File)){
+                    dict.Add(oto.File, frq);
+                } else {
+                    frq = null;
+                }
+            }
+            if(frq != null && frq.wavSampleLength != - 1) {
+                this.hopSize = frq.hopSize;
+
+                if (frq.wavSampleLength == 0) {
+                    try {
+                        using (var waveStream = Core.Format.Wave.OpenFile(oto.File)) {
+                            var sampleProvider = waveStream.ToSampleProvider();
+                            if (sampleProvider.WaveFormat.SampleRate == 44100) {
+                                frq.wavSampleLength = Core.Format.Wave.GetSamples(sampleProvider).Length;
+                            } else {
+                                frq.wavSampleLength = -1;
+                            }
+                        }
+                    } catch {
+                        frq.wavSampleLength = - 1;
+                    }
+                }
+
+                if (frq.wavSampleLength > 0) {
+                    int offset = (int)Math.Floor(oto.Offset * 44100 / 1000 / frq.hopSize); // frq samples
+                    int consonant = (int)Math.Floor((oto.Offset + oto.Consonant) * 44100 / 1000 / frq.hopSize);
+                    int cutoff = oto.Cutoff < 0 ?
+                        (int)Math.Floor((oto.Offset - oto.Cutoff) * 44100 / 1000 / frq.hopSize)
+                        : frq.wavSampleLength - (int)Math.Floor(oto.Cutoff * 44100 / 1000 / frq.hopSize);
+                    var completionF0 = Completion(frq.f0);
+                    var averageTone = MusicMath.FreqToTone(frq.averageF0);
+                    toneDiffFix = completionF0.Skip(offset).Take(consonant - offset).Select(f => MusicMath.FreqToTone(f) - averageTone).ToArray();
+                    toneDiffStretch = completionF0.Skip(consonant).Take(cutoff - consonant).Select(f => MusicMath.FreqToTone(f) - averageTone).ToArray();
+
+                    loaded = true;
+                }
+            }
+        }
+
+        private double[] Completion(double[] frqs) {
+            var list = new List<double>();
+            for (int i = 0; i < frqs.Length; i++) {
+                if (frqs[i] <= 60) {
+                    int min = i - 1;
+                    double minFrq = 0;
+                    while (min >= 0) {
+                        if (frqs[min] > 60) {
+                            minFrq = frqs[min];
+                            break;
+                        }
+                        min--;
+                    }
+                    int max = i + 1;
+                    double maxFrq = 0;
+                    while (max < frqs.Length) {
+                        if (frqs[max] > 60) {
+                            maxFrq = frqs[max];
+                            break;
+                        }
+                        max++;
+                    }
+                    if (minFrq <= 60) {
+                        list.Add(maxFrq);
+                    } else if (maxFrq <= 60) {
+                        list.Add(minFrq);
+                    } else {
+                        list.Add(MusicMath.Linear(min, max, minFrq, maxFrq, i));
+                    }
+                } else {
+                    list.Add(frqs[i]);
+                }
+            }
+            return list.ToArray();
+        }
+    }
+
     public class Frq {
         public const int kHopSize = 256;
 
         public int hopSize;
         public double averageF0;
         public double[] f0 = new double[0];
         public double[] amp = new double[0];
+        public int wavSampleLength = 0;
 
         /// <summary>
         /// If the wav path is null (machine learning voicebank), return false.
@@ -45,7 +135,8 @@ public bool Load(string wavPath) {
                 }
                 return true;
             } catch (Exception e) {
-                DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("failed to load frq file", e));
+                var customEx = new MessageCustomizableException("Failed to load frq file", "<translate:errors.failed.load>: frq file", e);
+                DocManager.Inst.ExecuteCmd(new ErrorMessageNotification(customEx));
                 return false;
             }
         }

diff --git a/OpenUtau.Core/Commands/Notifications.cs b/OpenUtau.Core/Commands/Notifications.cs
@@ -23,7 +23,17 @@ public ErrorMessageNotification(string message, Exception e) {
             this.message = message;
             this.e = e;
         }
-        public override string ToString() => $"Error message: {message} {e}";
+        public override string ToString() {
+            if (e is MessageCustomizableException mce) {
+                if (string.IsNullOrWhiteSpace(mce.Message)) {
+                    return $"Error message: {mce.SubstanceException.Message} {mce.SubstanceException}";
+                } else {
+                    return $"Error message: {mce.Message} {mce.SubstanceException}";
+                }
+            } else {
+                return $"Error message: {message} {e}";
+            }
+        }
     }
 
     public class LoadingNotification : UNotification {

diff --git a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs
@@ -2,12 +2,14 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
+using K4os.Hash.xxHash;
 using Serilog;
 using Microsoft.ML.OnnxRuntime;
 using Microsoft.ML.OnnxRuntime.Tensors;
 
 using OpenUtau.Api;
 using OpenUtau.Core.Ustx;
+using OpenUtau.Core.Util;
 
 namespace OpenUtau.Core.DiffSinger
 {
@@ -17,6 +19,8 @@ public abstract class DiffSingerBasePhonemizer : MachineLearningPhonemizer
         DsConfig dsConfig;
         string rootPath;
         float frameMs;
+        ulong linguisticHash;
+        ulong durationHash;
         InferenceSession linguisticModel;
         InferenceSession durationModel;
         IG2p g2p;
@@ -51,14 +55,18 @@ public override void SetSinger(USinger singer) {
             //Load models
             var linguisticModelPath = Path.Join(rootPath, dsConfig.linguistic);
             try {
-                linguisticModel = new InferenceSession(linguisticModelPath);
+                var linguisticModelBytes = File.ReadAllBytes(linguisticModelPath);
+                linguisticHash = XXH64.DigestOf(linguisticModelBytes);
+                linguisticModel = new InferenceSession(linguisticModelBytes);
             } catch (Exception e) {
                 Log.Error(e, $"failed to load linguistic model from {linguisticModelPath}");
                 return;
             }
             var durationModelPath = Path.Join(rootPath, dsConfig.dur);
             try {
-                durationModel = new InferenceSession(durationModelPath);
+                var durationModelBytes = File.ReadAllBytes(durationModelPath);
+                durationHash = XXH64.DigestOf(durationModelBytes);
+                durationModel = new InferenceSession(durationModelBytes);
             } catch (Exception e) {
                 Log.Error(e, $"failed to load duration model from {durationModelPath}");
                 return;
@@ -260,7 +268,14 @@ protected override void ProcessPart(Note[][] phrase) {
                 new DenseTensor<Int64>(word_dur, new int[] { word_dur.Length }, false)
                 .Reshape(new int[] { 1, word_dur.Length })));
             Onnx.VerifyInputNames(linguisticModel, linguisticInputs);
-            var linguisticOutputs = linguisticModel.Run(linguisticInputs);
+            var linguisticCache = Preferences.Default.DiffSingerTensorCache
+                ? new DiffSingerCache(linguisticHash, linguisticInputs)
+                : null;
+            var linguisticOutputs = linguisticCache?.Load();
+            if (linguisticOutputs is null) {
+                linguisticOutputs = linguisticModel.Run(linguisticInputs).Cast<NamedOnnxValue>().ToList();
+                linguisticCache?.Save(linguisticOutputs);
+            }
             Tensor<float> encoder_out = linguisticOutputs
                 .Where(o => o.Name == "encoder_out")
                 .First()
@@ -291,7 +306,14 @@ protected override void ProcessPart(Note[][] phrase) {
                 durationInputs.Add(NamedOnnxValue.CreateFromTensor("spk_embed", spkEmbedTensor));
             }
             Onnx.VerifyInputNames(durationModel, durationInputs);
-            var durationOutputs = durationModel.Run(durationInputs);
+            var durationCache = Preferences.Default.DiffSingerTensorCache
+                ? new DiffSingerCache(durationHash, durationInputs)
+                : null;
+            var durationOutputs = durationCache?.Load();
+            if (durationOutputs is null) {
+                durationOutputs = durationModel.Run(durationInputs).Cast<NamedOnnxValue>().ToList();
+                durationCache?.Save(durationOutputs);
+            }
             List<double> durationFrames = durationOutputs.First().AsTensor<float>().Select(x=>(double)x).ToList();
 
             //Alignment