From 3f1524193702b73dc6f6b847dbdff5dbfcc0d5a7 Mon Sep 17 00:00:00 2001 From: yqzhishen Date: Sat, 16 Mar 2024 14:56:50 +0800 Subject: [PATCH 01/27] Add tensor caching system for DiffSinger --- .../DiffSinger/DiffSingerBasePhonemizer.cs | 30 +- OpenUtau.Core/DiffSinger/DiffSingerCache.cs | 311 ++++++++++++++++++ OpenUtau.Core/DiffSinger/DiffSingerPitch.cs | 15 +- .../DiffSinger/DiffSingerRenderer.cs | 39 ++- OpenUtau.Core/DiffSinger/DiffSingerSinger.cs | 7 +- .../DiffSinger/DiffSingerVariance.cs | 30 +- OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs | 3 + OpenUtau.Core/Util/Preferences.cs | 1 + OpenUtau/Strings/Strings.axaml | 1 + OpenUtau/Strings/Strings.zh-CN.axaml | 1 + OpenUtau/ViewModels/PreferencesViewModel.cs | 7 + OpenUtau/Views/PreferencesDialog.axaml | 14 +- 12 files changed, 430 insertions(+), 29 deletions(-) create mode 100644 OpenUtau.Core/DiffSinger/DiffSingerCache.cs diff --git a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs index 61af0c478..d955756fb 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs @@ -2,12 +2,14 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using K4os.Hash.xxHash; using Serilog; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using OpenUtau.Api; using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util; namespace OpenUtau.Core.DiffSinger { @@ -17,6 +19,8 @@ public abstract class DiffSingerBasePhonemizer : MachineLearningPhonemizer DsConfig dsConfig; string rootPath; float frameMs; + ulong linguisticHash; + ulong durationHash; InferenceSession linguisticModel; InferenceSession durationModel; IG2p g2p; @@ -51,14 +55,18 @@ public override void SetSinger(USinger singer) { //Load models var linguisticModelPath = Path.Join(rootPath, dsConfig.linguistic); try { - linguisticModel = new InferenceSession(linguisticModelPath); + var linguisticModelBytes = File.ReadAllBytes(linguisticModelPath); + linguisticHash = XXH64.DigestOf(linguisticModelBytes); + linguisticModel = new InferenceSession(linguisticModelBytes); } catch (Exception e) { Log.Error(e, $"failed to load linguistic model from {linguisticModelPath}"); return; } var durationModelPath = Path.Join(rootPath, dsConfig.dur); try { - durationModel = new InferenceSession(durationModelPath); + var durationModelBytes = File.ReadAllBytes(durationModelPath); + durationHash = XXH64.DigestOf(durationModelBytes); + durationModel = new InferenceSession(durationModelBytes); } catch (Exception e) { Log.Error(e, $"failed to load duration model from {durationModelPath}"); return; @@ -260,7 +268,14 @@ protected override void ProcessPart(Note[][] phrase) { new DenseTensor(word_dur, new int[] { word_dur.Length }, false) .Reshape(new int[] { 1, word_dur.Length }))); Onnx.VerifyInputNames(linguisticModel, linguisticInputs); - var linguisticOutputs = linguisticModel.Run(linguisticInputs); + var linguisticCache = Preferences.Default.DiffSingerTensorCache + ? new DiffSingerCache(linguisticHash, linguisticInputs) + : null; + var linguisticOutputs = linguisticCache?.Load(); + if (linguisticOutputs is null) { + linguisticOutputs = linguisticModel.Run(linguisticInputs).Cast().ToList(); + linguisticCache?.Save(linguisticOutputs); + } Tensor encoder_out = linguisticOutputs .Where(o => o.Name == "encoder_out") .First() @@ -291,7 +306,14 @@ protected override void ProcessPart(Note[][] phrase) { durationInputs.Add(NamedOnnxValue.CreateFromTensor("spk_embed", spkEmbedTensor)); } Onnx.VerifyInputNames(durationModel, durationInputs); - var durationOutputs = durationModel.Run(durationInputs); + var durationCache = Preferences.Default.DiffSingerTensorCache + ? new DiffSingerCache(durationHash, durationInputs) + : null; + var durationOutputs = durationCache?.Load(); + if (durationOutputs is null) { + durationOutputs = durationModel.Run(durationInputs).Cast().ToList(); + durationCache?.Save(durationOutputs); + } List durationFrames = durationOutputs.First().AsTensor().Select(x=>(double)x).ToList(); //Alignment diff --git a/OpenUtau.Core/DiffSinger/DiffSingerCache.cs b/OpenUtau.Core/DiffSinger/DiffSingerCache.cs new file mode 100644 index 000000000..83c30d8e0 --- /dev/null +++ b/OpenUtau.Core/DiffSinger/DiffSingerCache.cs @@ -0,0 +1,311 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using K4os.Hash.xxHash; +using Microsoft.ML.OnnxRuntime; +using Microsoft.ML.OnnxRuntime.Tensors; +using Serilog; + +namespace OpenUtau.Core.DiffSinger { + + public class DiffSingerCache { + private const string FormatHeader = "TENSORCACHE"; + + private readonly ulong hash; + private readonly string filename; + + public ulong Hash => hash; + + public DiffSingerCache(ulong identifier, ICollection inputs) { + using var stream = new MemoryStream(); + using (var writer = new BinaryWriter(stream)) { + writer.Write(identifier); + foreach (var onnxValue in inputs.OrderBy(v => v.Name, StringComparer.InvariantCulture)) { + SerializeNamedOnnxValue(writer, onnxValue); + } + } + + hash = XXH64.DigestOf(stream.ToArray()); + filename = $"ds-{hash:x16}.tensorcache"; + } + + public ICollection? Load() { + var cachePath = Path.Join(PathManager.Inst.CachePath, filename); + if (!File.Exists(cachePath)) return null; + + var result = new List(); + using var stream = new FileStream(cachePath, FileMode.Open, FileAccess.Read); + using var reader = new BinaryReader(stream); + // header + if (reader.ReadString() != FormatHeader) { + throw new InvalidDataException($"[TensorCache] Unexpected file header in {filename}."); + } + try { + // count + var count = reader.ReadInt32(); + for (var i = 0; i < count; ++i) { + // data + result.Add(DeserializeNamedOnnxValue(reader)); + } + } catch (Exception e) { + Log.Error(e, + "[TensorCache] Exception encountered when deserializing cache file. Root exception message: {msg}", e.Message); + Delete(); + return null; + } + + return result; + } + + public void Delete() { + var cachePath = Path.Join(PathManager.Inst.CachePath, filename); + if (File.Exists(cachePath)) { + File.Delete(cachePath); + } + } + + public void Save(ICollection outputs) { + var cachePath = Path.Join(PathManager.Inst.CachePath, filename); + using var stream = new FileStream(cachePath, FileMode.Create, FileAccess.Write); + using var writer = new BinaryWriter(stream); + // header + writer.Write(FormatHeader); + // count + writer.Write(outputs.Count); + foreach (var onnxValue in outputs) { + // data + SerializeNamedOnnxValue(writer, onnxValue); + } + } + + private static void SerializeNamedOnnxValue(BinaryWriter writer, NamedOnnxValue namedOnnxValue) { + if (namedOnnxValue.ValueType != OnnxValueType.ONNX_TYPE_TENSOR) { + throw new NotSupportedException( + $"[TensorCache] The only supported ONNX value type is {OnnxValueType.ONNX_TYPE_TENSOR}. Got {namedOnnxValue.ValueType} instead." + ); + } + // name + writer.Write(namedOnnxValue.Name); + var tensorBase = (TensorBase) namedOnnxValue.Value; + var elementType = tensorBase.GetTypeInfo().ElementType; + // dtype + writer.Write((int)elementType); + switch (elementType) { + case TensorElementType.Float: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.UInt8: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.Int8: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.UInt16: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.Int16: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.Int32: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.Int64: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.String: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.Bool: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.Float16: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.Double: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.UInt32: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.UInt64: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.BFloat16: { + var tensor = namedOnnxValue.AsTensor(); + SerializeTensor(writer, tensor); + break; + } + case TensorElementType.Complex64: + case TensorElementType.Complex128: + case TensorElementType.DataTypeMax: + default: + throw new NotSupportedException($"[TensorCache] Unsupported tensor element type: {elementType}."); + } + } + + private static void SerializeTensor(BinaryWriter writer, Tensor tensor) { + if (tensor.IsReversedStride) { + throw new NotSupportedException("[TensorCache] Tensors in reversed strides are not supported."); + } + // rank + writer.Write(tensor.Rank); + // shape + foreach (var dim in tensor.Dimensions) { + writer.Write(dim); + } + // size + var size = (int)tensor.Length; + writer.Write(size); + if (typeof(T) == typeof(string)) { + // string tensor + // data + foreach (var element in tensor.ToArray()) { + writer.Write(element!.ToString()); + } + } else { + // numeric tensor + // data + var data = new byte[size * tensor.GetTypeInfo().TypeSize]; + Buffer.BlockCopy(tensor.ToArray(), 0, data, 0, data.Length); + writer.Write(data); + } + } + + private static NamedOnnxValue DeserializeNamedOnnxValue(BinaryReader reader) { + // name + var name = reader.ReadString(); + // dtype + var dtype = (TensorElementType)reader.ReadInt32(); + // rank + var rank = reader.ReadInt32(); + // shape + int[] shape = new int[rank]; + for (var i = 0; i < rank; ++i) { + shape[i] = reader.ReadInt32(); + } + // size + var size = reader.ReadInt32(); + NamedOnnxValue namedOnnxValue; + switch (dtype) { + case TensorElementType.Float: { + var tensor = DeserializeTensor(reader, size, sizeof(float), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.UInt8: { + var tensor = DeserializeTensor(reader, size, sizeof(byte), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.Int8: { + var tensor = DeserializeTensor(reader, size, sizeof(sbyte), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.UInt16: { + var tensor = DeserializeTensor(reader, size, sizeof(ushort), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.Int16: { + var tensor = DeserializeTensor(reader, size, sizeof(short), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.Int32: { + var tensor = DeserializeTensor(reader, size, sizeof(int), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.Int64: { + var tensor = DeserializeTensor(reader, size, sizeof(long), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.String: { + // string tensor + Tensor tensor = new DenseTensor(size); + for (var i = 0; i < size; ++i) { + tensor[i] = reader.ReadString(); + } + tensor = tensor.Reshape(shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.Bool: { + var tensor = DeserializeTensor(reader, size, sizeof(bool), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.Float16: { + var tensor = DeserializeTensor(reader, size, sizeof(ushort), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.Double: { + var tensor = DeserializeTensor(reader, size, sizeof(double), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.UInt32: { + var tensor = DeserializeTensor(reader, size, sizeof(uint), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.UInt64: { + var tensor = DeserializeTensor(reader, size, sizeof(ulong), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.BFloat16: { + var tensor = DeserializeTensor(reader, size, sizeof(ushort), shape); + namedOnnxValue = NamedOnnxValue.CreateFromTensor(name, tensor); + break; + } + case TensorElementType.Complex64: + case TensorElementType.Complex128: + case TensorElementType.DataTypeMax: + default: + throw new NotSupportedException($"[TensorCache] Unsupported tensor element type: {dtype}."); + } + + return namedOnnxValue; + } + + private static Tensor DeserializeTensor(BinaryReader reader, int size, int typeSize, ReadOnlySpan shape) + { + var bytes = reader.ReadBytes(size * typeSize); + var data = new T[size]; + Buffer.BlockCopy(bytes, 0, data, 0, bytes.Length); + Tensor tensor = new DenseTensor(data, shape); + return tensor; + } + } +} diff --git a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs index 503c6cecb..7327717a9 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs @@ -4,6 +4,7 @@ using System.Linq; using System.Runtime.CompilerServices; using System.Text; +using K4os.Hash.xxHash; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; @@ -19,6 +20,7 @@ public class DsPitch : IDisposable string rootPath; DsConfig dsConfig; List phonemes; + ulong linguisticHash; InferenceSession linguisticModel; InferenceSession pitchModel; IG2p g2p; @@ -39,7 +41,9 @@ public DsPitch(string rootPath) phonemes = File.ReadLines(phonemesPath, Encoding.UTF8).ToList(); //Load models var linguisticModelPath = Path.Join(rootPath, dsConfig.linguistic); - linguisticModel = Onnx.getInferenceSession(linguisticModelPath); + var linguisticModelBytes = File.ReadAllBytes(linguisticModelPath); + linguisticHash = XXH64.DigestOf(linguisticModelBytes); + linguisticModel = Onnx.getInferenceSession(linguisticModelBytes); var pitchModelPath = Path.Join(rootPath, dsConfig.pitch); pitchModel = Onnx.getInferenceSession(pitchModelPath); frameMs = 1000f * dsConfig.hop_size / dsConfig.sample_rate; @@ -123,7 +127,14 @@ public RenderPitchResult Process(RenderPhrase phrase){ } Onnx.VerifyInputNames(linguisticModel, linguisticInputs); - var linguisticOutputs = linguisticModel.Run(linguisticInputs); + var linguisticCache = Preferences.Default.DiffSingerTensorCache + ? new DiffSingerCache(linguisticHash, linguisticInputs) + : null; + var linguisticOutputs = linguisticCache?.Load(); + if (linguisticOutputs is null) { + linguisticOutputs = linguisticModel.Run(linguisticInputs).Cast().ToList(); + linguisticCache?.Save(linguisticOutputs); + } Tensor encoder_out = linguisticOutputs .Where(o => o.Name == "encoder_out") .First() diff --git a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs index dd71f66ee..dd2106940 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs @@ -12,6 +12,7 @@ using OpenUtau.Core.Render; using OpenUtau.Core.SignalChain; using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util; using Serilog; namespace OpenUtau.Core.DiffSinger { @@ -292,28 +293,40 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup, Cancellati .Reshape(new int[] { 1, tension.Length }))); } } - Tensor mel; - lock(acousticModel){ - if(cancellation.IsCancellationRequested) { - return null; + Onnx.VerifyInputNames(acousticModel, acousticInputs); + var acousticCache = Preferences.Default.DiffSingerTensorCache + ? new DiffSingerCache(singer.acousticHash, acousticInputs) + : null; + var acousticOutputs = acousticCache?.Load(); + if (acousticOutputs is null) { + lock(acousticModel){ + if(cancellation.IsCancellationRequested) { + return null; + } + acousticOutputs = acousticModel.Run(acousticInputs).Cast().ToList(); } - Onnx.VerifyInputNames(acousticModel, acousticInputs); - var acousticOutputs = acousticModel.Run(acousticInputs); - mel = acousticOutputs.First().AsTensor().Clone(); + acousticCache?.Save(acousticOutputs); } + Tensor mel = acousticOutputs.First().AsTensor().Clone(); //vocoder //waveform = session.run(['waveform'], {'mel': mel, 'f0': f0})[0] var vocoderInputs = new List(); vocoderInputs.Add(NamedOnnxValue.CreateFromTensor("mel", mel)); vocoderInputs.Add(NamedOnnxValue.CreateFromTensor("f0",f0tensor)); - Tensor samplesTensor; - lock(vocoder){ - if(cancellation.IsCancellationRequested) { - return null; + var vocoderCache = Preferences.Default.DiffSingerTensorCache + ? new DiffSingerCache(vocoder.hash, vocoderInputs) + : null; + var vocoderOutputs = vocoderCache?.Load(); + if (vocoderOutputs is null) { + lock(vocoder){ + if(cancellation.IsCancellationRequested) { + return null; + } + vocoderOutputs = vocoder.session.Run(vocoderInputs).Cast().ToList(); } - var vocoderOutputs = vocoder.session.Run(vocoderInputs); - samplesTensor = vocoderOutputs.First().AsTensor(); + vocoderCache?.Save(vocoderOutputs); } + Tensor samplesTensor = vocoderOutputs.First().AsTensor(); //Check the size of samplesTensor int[] expectedShape = new int[] { 1, -1 }; if(!DiffSingerUtils.ValidateShape(samplesTensor, expectedShape)){ diff --git a/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs b/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs index db02db87d..12742d963 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs @@ -3,6 +3,7 @@ using System.IO; using System.Linq; using System.Text; +using K4os.Hash.xxHash; using OpenUtau.Classic; using OpenUtau.Core.Ustx; using Serilog; @@ -44,6 +45,7 @@ class DiffSingerSinger : USinger { public List phonemes = new List(); public DsConfig dsConfig; + public ulong acousticHash; public InferenceSession acousticSession = null; public DsVocoder vocoder = null; public DsPitch pitchPredictor = null; @@ -126,7 +128,10 @@ public override byte[] LoadPortrait() { public InferenceSession getAcousticSession() { if (acousticSession is null) { - acousticSession = Onnx.getInferenceSession(Path.Combine(Location, dsConfig.acoustic)); + var acousticPath = Path.Combine(Location, dsConfig.acoustic); + var acousticBytes = File.ReadAllBytes(acousticPath); + acousticHash = XXH64.DigestOf(acousticBytes); + acousticSession = Onnx.getInferenceSession(acousticBytes); } return acousticSession; } diff --git a/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs b/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs index 017d04ea2..82b89131b 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs @@ -3,7 +3,7 @@ using System.IO; using System.Linq; using System.Text; - +using K4os.Hash.xxHash; using Serilog; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; @@ -23,6 +23,8 @@ public class DsVariance : IDisposable{ string rootPath; DsConfig dsConfig; List phonemes; + ulong linguisticHash; + ulong varianceHash; InferenceSession linguisticModel; InferenceSession varianceModel; IG2p g2p; @@ -43,9 +45,13 @@ public DsVariance(string rootPath) phonemes = File.ReadLines(phonemesPath, Encoding.UTF8).ToList(); //Load models var linguisticModelPath = Path.Join(rootPath, dsConfig.linguistic); - linguisticModel = Onnx.getInferenceSession(linguisticModelPath); + var linguisticModelBytes = File.ReadAllBytes(linguisticModelPath); + linguisticHash = XXH64.DigestOf(linguisticModelBytes); + linguisticModel = Onnx.getInferenceSession(linguisticModelBytes); var varianceModelPath = Path.Join(rootPath, dsConfig.variance); - varianceModel = Onnx.getInferenceSession(varianceModelPath); + var varianceModelBytes = File.ReadAllBytes(varianceModelPath); + varianceHash = XXH64.DigestOf(varianceModelBytes); + varianceModel = Onnx.getInferenceSession(varianceModelBytes); frameMs = 1000f * dsConfig.hop_size / dsConfig.sample_rate; //Load g2p g2p = LoadG2p(rootPath); @@ -119,7 +125,14 @@ public VarianceResult Process(RenderPhrase phrase){ } Onnx.VerifyInputNames(linguisticModel, linguisticInputs); - var linguisticOutputs = linguisticModel.Run(linguisticInputs); + var linguisticCache = Preferences.Default.DiffSingerTensorCache + ? new DiffSingerCache(linguisticHash, linguisticInputs) + : null; + var linguisticOutputs = linguisticCache?.Load(); + if (linguisticOutputs is null) { + linguisticOutputs = linguisticModel.Run(linguisticInputs).Cast().ToList(); + linguisticCache?.Save(linguisticOutputs); + } Tensor encoder_out = linguisticOutputs .Where(o => o.Name == "encoder_out") .First() @@ -183,7 +196,14 @@ public VarianceResult Process(RenderPhrase phrase){ varianceInputs.Add(NamedOnnxValue.CreateFromTensor("spk_embed", spkEmbedTensor)); } Onnx.VerifyInputNames(varianceModel, varianceInputs); - var varianceOutputs = varianceModel.Run(varianceInputs); + var varianceCache = Preferences.Default.DiffSingerTensorCache + ? new DiffSingerCache(varianceHash, varianceInputs) + : null; + var varianceOutputs = varianceCache?.Load(); + if (varianceOutputs is null) { + varianceOutputs = varianceModel.Run(varianceInputs).Cast().ToList(); + varianceCache?.Save(varianceOutputs); + } Tensor? energy_pred = dsConfig.predict_energy ? varianceOutputs .Where(o => o.Name == "energy_pred") diff --git a/OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs b/OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs index 1a5fc35e7..1b1188fd6 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs @@ -1,11 +1,13 @@ using System; using System.IO; +using K4os.Hash.xxHash; using Microsoft.ML.OnnxRuntime; namespace OpenUtau.Core.DiffSinger { public class DsVocoder : IDisposable { public string Location; public DsVocoderConfig config; + public ulong hash; public InferenceSession session; public int num_mel_bins => config.num_mel_bins; @@ -25,6 +27,7 @@ public DsVocoder(string name) { catch (Exception ex) { throw new Exception($"Error loading vocoder {name}. Please download vocoder from https://github.com/xunmengshe/OpenUtau/wiki/Vocoders"); } + hash = XXH64.DigestOf(model); session = Onnx.getInferenceSession(model); } diff --git a/OpenUtau.Core/Util/Preferences.cs b/OpenUtau.Core/Util/Preferences.cs index 0fabcc1fb..991c3d03d 100644 --- a/OpenUtau.Core/Util/Preferences.cs +++ b/OpenUtau.Core/Util/Preferences.cs @@ -144,6 +144,7 @@ public class SerializablePreferences { public int OnnxGpu = 0; public int DiffsingerSpeedup = 50; public int DiffSingerDepth = 1000; + public bool DiffSingerTensorCache = true; public string Language = string.Empty; public string SortingOrder = string.Empty; public List RecentFiles = new List(); diff --git a/OpenUtau/Strings/Strings.axaml b/OpenUtau/Strings/Strings.axaml index 2b51b8f0f..a05068c69 100644 --- a/OpenUtau/Strings/Strings.axaml +++ b/OpenUtau/Strings/Strings.axaml @@ -333,6 +333,7 @@ Warning: this option removes custom presets. Use track color in UI Cache Clear cache on quit + DiffSinger Tensor Cache Preferences Note: please restart OpenUtau after changing this item. Off diff --git a/OpenUtau/Strings/Strings.zh-CN.axaml b/OpenUtau/Strings/Strings.zh-CN.axaml index 94ac8bd55..250751713 100644 --- a/OpenUtau/Strings/Strings.zh-CN.axaml +++ b/OpenUtau/Strings/Strings.zh-CN.axaml @@ -318,6 +318,7 @@ 在界面上使用音轨颜色 缓存 退出时清空缓存 + DiffSinger 张量缓存 使用偏好 注意: 修改本项后请重启OpenUtau diff --git a/OpenUtau/ViewModels/PreferencesViewModel.cs b/OpenUtau/ViewModels/PreferencesViewModel.cs index 67f1a41c2..ea155e403 100644 --- a/OpenUtau/ViewModels/PreferencesViewModel.cs +++ b/OpenUtau/ViewModels/PreferencesViewModel.cs @@ -42,6 +42,7 @@ public AudioOutputDevice? AudioOutputDevice { public List DiffsingerSpeedupOptions { get; } = new List { 1, 5, 10, 20, 50, 100 }; [Reactive] public int DiffSingerDepth { get; set; } [Reactive] public int DiffsingerSpeedup { get; set; } + [Reactive] public bool DiffSingerTensorCache { get; set; } [Reactive] public bool HighThreads { get; set; } [Reactive] public int Theme { get; set; } [Reactive] public bool PenPlusDefault { get; set; } @@ -141,6 +142,7 @@ public PreferencesViewModel() { OnnxGpu = OnnxGpuOptions.FirstOrDefault(x => x.deviceId == Preferences.Default.OnnxGpu, OnnxGpuOptions[0]); DiffSingerDepth = Preferences.Default.DiffSingerDepth; DiffsingerSpeedup = Preferences.Default.DiffsingerSpeedup; + DiffSingerTensorCache = Preferences.Default.DiffSingerTensorCache; Theme = Preferences.Default.Theme; PenPlusDefault = Preferences.Default.PenPlusDefault; DegreeStyle = Preferences.Default.DegreeStyle; @@ -334,6 +336,11 @@ public PreferencesViewModel() { Preferences.Default.DiffSingerDepth = index; Preferences.Save(); }); + this.WhenAnyValue(vm => vm.DiffSingerTensorCache) + .Subscribe(useCache => { + Preferences.Default.DiffSingerTensorCache = useCache; + Preferences.Save(); + }); } public void TestAudioOutputDevice() { diff --git a/OpenUtau/Views/PreferencesDialog.axaml b/OpenUtau/Views/PreferencesDialog.axaml index b5b4f81eb..483ed30ee 100644 --- a/OpenUtau/Views/PreferencesDialog.axaml +++ b/OpenUtau/Views/PreferencesDialog.axaml @@ -97,10 +97,16 @@ - - - - + + + + + + + + + + From fb834dc87d42f19f8c95435ea7b725a13d7efc01 Mon Sep 17 00:00:00 2001 From: maiko3tattun Date: Thu, 28 Mar 2024 22:51:12 +0900 Subject: [PATCH 02/27] Update Japanese translations --- OpenUtau/Strings/Strings.ja-JP.axaml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/OpenUtau/Strings/Strings.ja-JP.axaml b/OpenUtau/Strings/Strings.ja-JP.axaml index b01dc9159..357b9ccce 100644 --- a/OpenUtau/Strings/Strings.ja-JP.axaml +++ b/OpenUtau/Strings/Strings.ja-JP.axaml @@ -253,7 +253,7 @@ 音符の隙間を埋める ピッチ曲線(PITD)をピッチ点に変換 ビブラートを削除 - + 音符の重複を修正 漢字をピンインへ クロスフェードを長くする レンダリング済みピッチの読み込み @@ -325,7 +325,7 @@ Solfège (ドレミファソラシ) 言語 ピアノロール上に他トラックの音符を表示 - + ピアノロールウィンドウにアイコンを表示 ピアノロール上に背景イラストを表示 シンガー名の表示言語 テーマ @@ -346,7 +346,7 @@ 深い階層のフォルダも読み込む リセット 選択 - + ペン+ツールをデフォルトで使用する 再生 自動スクロール 自動スクロールモード @@ -379,7 +379,7 @@ Note: エンジンを使用するには、使用したいエンジンのDLLまたはEXEファイルをOpenUTAUフォルダ内のResamplersフォルダに追加し、選択してください。 - + ミュート中のトラックをレンダリングしない @@ -409,11 +409,11 @@ ズームイン ズームアウト サンプルボイスを再生 - - - - - + シンガーの配布パッケージを作成 + 自作音源の配布用zipファイルを作成します。 + 以下のファイルは配布パッケージに含まれません(gitignore形式で記述):--> + 作成 + 指定したタイプのファイルを配布パッケージに含めないようにする readme.txtを開く readme.txtが見つかりません 再読み込み @@ -450,7 +450,7 @@ テキストファイルのエンコード テキストが文字化けしないエンコードを選択してください。 - + エイリアスの手動指定 左クリック: 表情を設定 右クリック: 表情をリセット このレンダラーでは表情をサポートしていません。 @@ -487,7 +487,7 @@ トラックの名前を変更 レンダラーを選択 シンガーを選択 - + ソロ ソロ (他トラックのソロを解除しない) ソロ (他トラックのソロを解除) 全トラックのソロを解除 From a6fa9502f03716967e92b985073877818f3f8586 Mon Sep 17 00:00:00 2001 From: SoulMelody Date: Fri, 29 Mar 2024 21:03:26 +0800 Subject: [PATCH 03/27] Fix font query result with multiple comma-separated values --- OpenUtau/Program.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/OpenUtau/Program.cs b/OpenUtau/Program.cs index 5bd81c28a..26c2b02d9 100644 --- a/OpenUtau/Program.cs +++ b/OpenUtau/Program.cs @@ -63,7 +63,8 @@ public static AppBuilder BuildAvaloniaApp() { string fontFamily = process.StandardOutput.ReadToEnd(); if (!string.IsNullOrEmpty(fontFamily)) { - fontOptions.DefaultFamilyName = fontFamily; + string [] fontFamilies = fontFamily.Split(','); + fontOptions.DefaultFamilyName = fontFamilies[0]; } } return AppBuilder.Configure() From 261a33e62e723547fc42773f078153b728107ac2 Mon Sep 17 00:00:00 2001 From: maiko3tattun Date: Fri, 29 Mar 2024 22:35:33 +0900 Subject: [PATCH 04/27] Translate error messages and put them in the expander --- OpenUtau.Core/Classic/Frq.cs | 2 +- OpenUtau.Core/Commands/Notifications.cs | 14 +++++++++++ OpenUtau.Core/PlaybackManager.cs | 10 ++++---- OpenUtau.Core/Render/RenderEngine.cs | 2 +- OpenUtau/Strings/Strings.axaml | 8 +++++++ OpenUtau/Strings/Strings.ja-JP.axaml | 8 +++++++ OpenUtau/ViewModels/EditSubbanksViewModel.cs | 4 ++-- OpenUtau/ViewModels/MainWindowViewModel.cs | 6 ++--- OpenUtau/ViewModels/PianoRollViewModel.cs | 2 +- OpenUtau/ViewModels/SingersViewModel.cs | 14 +++++------ OpenUtau/Views/EditSubbanksDialog.axaml.cs | 4 ++-- OpenUtau/Views/MainWindow.axaml.cs | 6 ++++- OpenUtau/Views/MessageBox.axaml | 11 +++++++++ OpenUtau/Views/MessageBox.axaml.cs | 25 ++++++++++++++++---- OpenUtau/Views/PianoRollWindow.axaml.cs | 4 ++-- 15 files changed, 90 insertions(+), 30 deletions(-) diff --git a/OpenUtau.Core/Classic/Frq.cs b/OpenUtau.Core/Classic/Frq.cs index 07a54c3b7..3a238e8db 100644 --- a/OpenUtau.Core/Classic/Frq.cs +++ b/OpenUtau.Core/Classic/Frq.cs @@ -45,7 +45,7 @@ public bool Load(string wavPath) { } return true; } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("failed to load frq file", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.load", ": frq file", e)); return false; } } diff --git a/OpenUtau.Core/Commands/Notifications.cs b/OpenUtau.Core/Commands/Notifications.cs index 4ecea51e3..b1f6ce18f 100644 --- a/OpenUtau.Core/Commands/Notifications.cs +++ b/OpenUtau.Core/Commands/Notifications.cs @@ -26,6 +26,20 @@ public ErrorMessageNotification(string message, Exception e) { public override string ToString() => $"Error message: {message} {e}"; } + public class ErrorMessageNotificationWithTranslation : ErrorMessageNotification { + public readonly string stringKey = string.Empty; + public ErrorMessageNotificationWithTranslation(string stringKey) : base(string.Empty) { + this.stringKey = stringKey; + } + public ErrorMessageNotificationWithTranslation(string stringKey, Exception e) : base(e) { + this.stringKey = stringKey; + } + public ErrorMessageNotificationWithTranslation(string stringKey, string additionalMessage, Exception e) : base(additionalMessage, e) { + this.stringKey = stringKey; + } + public override string ToString() => $"Error message: {stringKey}{message} {e}"; + } + public class LoadingNotification : UNotification { public readonly Type window; public readonly bool startLoading; diff --git a/OpenUtau.Core/PlaybackManager.cs b/OpenUtau.Core/PlaybackManager.cs index 3ce615047..2961bdc22 100644 --- a/OpenUtau.Core/PlaybackManager.cs +++ b/OpenUtau.Core/PlaybackManager.cs @@ -132,7 +132,7 @@ private void Render(UProject project, int tick, int endTick, int trackNo) { } catch (Exception e) { Log.Error(e, "Failed to render."); StopPlayback(); - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to render.", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.render", e)); } }); } @@ -161,10 +161,10 @@ await Task.Run(() => { WaveFileWriter.CreateWaveFile16(exportPath, new ExportAdapter(projectMix).ToMono(1, 0)); DocManager.Inst.ExecuteCmd(new ProgressBarNotification(0, $"Exported to {exportPath}.")); } catch (IOException ioe) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification($"Failed to export {exportPath}.", ioe)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.export", $": {exportPath}", ioe)); DocManager.Inst.ExecuteCmd(new ProgressBarNotification(0, $"Failed to export {exportPath}.")); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to render.", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.render", e)); DocManager.Inst.ExecuteCmd(new ProgressBarNotification(0, $"Failed to render.")); } }); @@ -189,10 +189,10 @@ await Task.Run(() => { DocManager.Inst.ExecuteCmd(new ProgressBarNotification(0, $"Exported to {file}.")); } } catch (IOException ioe) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification($"Failed to export {file}.", ioe)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.export", $": {file}", ioe)); DocManager.Inst.ExecuteCmd(new ProgressBarNotification(0, $"Failed to export {file}.")); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to render.", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.render", e)); DocManager.Inst.ExecuteCmd(new ProgressBarNotification(0, $"Failed to render.")); } }); diff --git a/OpenUtau.Core/Render/RenderEngine.cs b/OpenUtau.Core/Render/RenderEngine.cs index 51e81c2e5..6749de200 100644 --- a/OpenUtau.Core/Render/RenderEngine.cs +++ b/OpenUtau.Core/Render/RenderEngine.cs @@ -107,7 +107,7 @@ public Tuple> RenderMixdown(TaskScheduler uiScheduler, ref if (task.IsFaulted && !wait) { Log.Error(task.Exception.Flatten(), "Failed to render."); PlaybackManager.Inst.StopPlayback(); - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to render.", task.Exception)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.render", task.Exception)); } }, CancellationToken.None, TaskContinuationOptions.OnlyOnFaulted, uiScheduler); if (wait) { diff --git a/OpenUtau/Strings/Strings.axaml b/OpenUtau/Strings/Strings.axaml index 2b27834a7..1a43d91c0 100644 --- a/OpenUtau/Strings/Strings.axaml +++ b/OpenUtau/Strings/Strings.axaml @@ -61,6 +61,14 @@ Applies to all notes in this track: Error + Error Details + Failed to export + Failed to load + Failed to open + Failed to render. + Failed to run editing macro. + Failed to save + Failed to save singer config file. Abbreviation Apply diff --git a/OpenUtau/Strings/Strings.ja-JP.axaml b/OpenUtau/Strings/Strings.ja-JP.axaml index b01dc9159..1fd65e414 100644 --- a/OpenUtau/Strings/Strings.ja-JP.axaml +++ b/OpenUtau/Strings/Strings.ja-JP.axaml @@ -61,6 +61,14 @@ このトラック内のすべての音符に適用されます: エラー + エラーの詳細 + エクスポートに失敗しました + 読み込みに失敗しました + 保存に失敗しました + レンダリングに失敗しました。 + 一括処理に失敗しました。 + 保存に失敗しました + シンガー設定の保存に失敗しました。 パラメータの略称 適用 diff --git a/OpenUtau/ViewModels/EditSubbanksViewModel.cs b/OpenUtau/ViewModels/EditSubbanksViewModel.cs index e0b4ba820..7d09634fd 100644 --- a/OpenUtau/ViewModels/EditSubbanksViewModel.cs +++ b/OpenUtau/ViewModels/EditSubbanksViewModel.cs @@ -109,7 +109,7 @@ public void LoadSubbanks() { } SelectedColor = Colors[0]; } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to load subbanks", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.load", ": subbanks", e)); } } @@ -184,7 +184,7 @@ public void SaveSubbanks() { bankConfig.Save(stream); } } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to save subbanks", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.save", ": subbanks", e)); } LoadSubbanks(); } diff --git a/OpenUtau/ViewModels/MainWindowViewModel.cs b/OpenUtau/ViewModels/MainWindowViewModel.cs index d5b42220d..4e0a78bf0 100644 --- a/OpenUtau/ViewModels/MainWindowViewModel.cs +++ b/OpenUtau/ViewModels/MainWindowViewModel.cs @@ -63,7 +63,7 @@ public MainWindowViewModel() { try { OpenProject(new[] { file }); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("failed to open recent.", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.open", ": recent project", e)); } }); OpenTemplateCommand = ReactiveCommand.Create(file => { @@ -72,7 +72,7 @@ public MainWindowViewModel() { DocManager.Inst.Project.Saved = false; DocManager.Inst.Project.FilePath = string.Empty; } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("failed to open template.", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.open", ": project template", e)); } }); PartDeleteCommand = ReactiveCommand.Create(part => { @@ -111,7 +111,7 @@ public void NewProject() { DocManager.Inst.Project.FilePath = string.Empty; return; } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("failed to load default template.", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.load", ": default template", e)); } } DocManager.Inst.ExecuteCmd(new LoadProjectNotification(Core.Format.Ustx.Create())); diff --git a/OpenUtau/ViewModels/PianoRollViewModel.cs b/OpenUtau/ViewModels/PianoRollViewModel.cs index 0620be99f..46b25812f 100644 --- a/OpenUtau/ViewModels/PianoRollViewModel.cs +++ b/OpenUtau/ViewModels/PianoRollViewModel.cs @@ -168,7 +168,7 @@ public PianoRollViewModel() { try{ edit.Run(NotesViewModel.Project, NotesViewModel.Part, NotesViewModel.Selection.ToList(), DocManager.Inst); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to run editing macro", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.runeditingmacro", e)); } } }); diff --git a/OpenUtau/ViewModels/SingersViewModel.cs b/OpenUtau/ViewModels/SingersViewModel.cs index f0eab58ad..5e045cde0 100644 --- a/OpenUtau/ViewModels/SingersViewModel.cs +++ b/OpenUtau/ViewModels/SingersViewModel.cs @@ -158,7 +158,7 @@ private void SetEncoding(Encoding encoding) { try { ModifyConfig(Singer, config => config.TextFileEncoding = encoding.WebName); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to set encoding", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.savesingerconfig", e)); } Refresh(); } @@ -170,7 +170,7 @@ public void SetImage(string filepath) { try { ModifyConfig(Singer, config => config.Image = filepath); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to set image", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.savesingerconfig", e)); } Refresh(); } @@ -182,7 +182,7 @@ public void SetPortrait(string filepath) { try { ModifyConfig(Singer, config => config.Portrait = filepath); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to set portrait", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.savesingerconfig", e)); } Refresh(); } @@ -194,7 +194,7 @@ private void SetSingerType(string singerType) { try { ModifyConfig(Singer, config => config.SingerType = singerType); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to set singer type", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.savesingerconfig", e)); } Refresh(); } @@ -206,7 +206,7 @@ private void SetDefaultPhonemizer(Api.PhonemizerFactory factory) { try { ModifyConfig(Singer, config => config.DefaultPhonemizer = factory.type.FullName ?? string.Empty); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to set portrait", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.savesingerconfig", e)); } Refresh(); } @@ -218,7 +218,7 @@ public void SetUseFilenameAsAlias() { try { ModifyConfig(Singer, config => config.UseFilenameAsAlias = !this.UseFilenameAsAlias); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to set use filename", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.savesingerconfig", e)); } Refresh(); } @@ -338,7 +338,7 @@ public void LoadSubbanks() { try { Subbanks.AddRange(Singer.Subbanks); } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to load subbanks", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.load", ": subbanks", e)); } } diff --git a/OpenUtau/Views/EditSubbanksDialog.axaml.cs b/OpenUtau/Views/EditSubbanksDialog.axaml.cs index cae15e655..6bed81065 100644 --- a/OpenUtau/Views/EditSubbanksDialog.axaml.cs +++ b/OpenUtau/Views/EditSubbanksDialog.axaml.cs @@ -83,7 +83,7 @@ async void OnImportMap(object sender, RoutedEventArgs args) { } } } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to load prefix map", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.load", ": prefix map", e)); } } } @@ -102,7 +102,7 @@ async void OnExportMap(object sender, RoutedEventArgs args) { } } } catch (Exception e) { - DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("Failed to save prefix map", e)); + DocManager.Inst.ExecuteCmd(new ErrorMessageNotificationWithTranslation("errors.failed.save", ": prefix map", e)); } } } diff --git a/OpenUtau/Views/MainWindow.axaml.cs b/OpenUtau/Views/MainWindow.axaml.cs index 48a339c91..48b7130ab 100644 --- a/OpenUtau/Views/MainWindow.axaml.cs +++ b/OpenUtau/Views/MainWindow.axaml.cs @@ -1328,7 +1328,11 @@ public void OnNext(UCommand cmd, bool isUndo) { MessageBox.MessageBoxButtons.Ok); break; default: - MessageBox.ShowError(this, notif.message, notif.e); + if (notif is ErrorMessageNotificationWithTranslation translatedNotif) { + MessageBox.ShowError(this, ThemeManager.GetString(translatedNotif.stringKey) + translatedNotif.message, translatedNotif.e); + } else { + MessageBox.ShowError(this, notif.message, notif.e); + } break; } } else if (cmd is LoadingNotification loadingNotif && loadingNotif.window == typeof(MainWindow)) { diff --git a/OpenUtau/Views/MessageBox.axaml b/OpenUtau/Views/MessageBox.axaml index fff696672..1f63b1444 100644 --- a/OpenUtau/Views/MessageBox.axaml +++ b/OpenUtau/Views/MessageBox.axaml @@ -13,7 +13,11 @@ + + + + + + From 5fc17019424ce66941c49cc8d65f3f72920ce486 Mon Sep 17 00:00:00 2001 From: maiko3tattun Date: Sat, 30 Mar 2024 23:45:17 +0900 Subject: [PATCH 09/27] Cache frq --- OpenUtau.Core/Classic/ClassicSinger.cs | 1 + OpenUtau.Core/Classic/Frq.cs | 90 +++++++++++++++++++ OpenUtau.Core/Render/RenderPhrase.cs | 116 +++++++++++++------------ OpenUtau.Core/Ustx/UPhoneme.cs | 74 ---------------- OpenUtau.Core/Ustx/USinger.cs | 1 + 5 files changed, 152 insertions(+), 130 deletions(-) diff --git a/OpenUtau.Core/Classic/ClassicSinger.cs b/OpenUtau.Core/Classic/ClassicSinger.cs index f4c356211..6c78d3f57 100644 --- a/OpenUtau.Core/Classic/ClassicSinger.cs +++ b/OpenUtau.Core/Classic/ClassicSinger.cs @@ -44,6 +44,7 @@ public class ClassicSinger : USinger { OtoWatcher otoWatcher; public bool? UseFilenameAsAlias { get => voicebank.UseFilenameAsAlias; set => voicebank.UseFilenameAsAlias = value; } + public Dictionary Frqs { get; set; } = new Dictionary(); public ClassicSinger(Voicebank voicebank) { this.voicebank = voicebank; diff --git a/OpenUtau.Core/Classic/Frq.cs b/OpenUtau.Core/Classic/Frq.cs index 07a54c3b7..1632b2aa0 100644 --- a/OpenUtau.Core/Classic/Frq.cs +++ b/OpenUtau.Core/Classic/Frq.cs @@ -1,10 +1,99 @@ using System; +using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; +using NAudio.Wave; using OpenUtau.Core; +using OpenUtau.Core.Ustx; namespace OpenUtau.Classic { + public class OtoFrq { + public double[] toneDiffFix = new double[0]; + public double[] toneDiffStretch = new double[0]; + public int hopSize; + public bool loaded = false; + + public OtoFrq(UOto oto, Dictionary dict) { + if (!dict.TryGetValue(oto.File, out var frq)) { + frq = new Frq(); + if (frq.Load(oto.File)){ + dict.Add(oto.File, frq); + } else { + frq = null; + } + } + if(frq != null && frq.wavSampleLength != - 1) { + this.hopSize = frq.hopSize; + + if (frq.wavSampleLength == 0) { + try { + using (var waveStream = Core.Format.Wave.OpenFile(oto.File)) { + var sampleProvider = waveStream.ToSampleProvider(); + if (sampleProvider.WaveFormat.SampleRate == 44100) { + frq.wavSampleLength = Core.Format.Wave.GetSamples(sampleProvider).Length; + } else { + frq.wavSampleLength = -1; + } + } + } catch { + frq.wavSampleLength = - 1; + } + } + + if (frq.wavSampleLength > 0) { + int offset = (int)Math.Floor(oto.Offset * 44100 / 1000 / frq.hopSize); // frq samples + int consonant = (int)Math.Floor((oto.Offset + oto.Consonant) * 44100 / 1000 / frq.hopSize); + int cutoff = oto.Cutoff < 0 ? + (int)Math.Floor((oto.Offset - oto.Cutoff) * 44100 / 1000 / frq.hopSize) + : frq.wavSampleLength - (int)Math.Floor(oto.Cutoff * 44100 / 1000 / frq.hopSize); + var completionF0 = Completion(frq.f0); + var averageTone = MusicMath.FreqToTone(frq.averageF0); + toneDiffFix = completionF0.Skip(offset).Take(consonant - offset).Select(f => MusicMath.FreqToTone(f) - averageTone).ToArray(); + toneDiffStretch = completionF0.Skip(consonant).Take(cutoff - consonant).Select(f => MusicMath.FreqToTone(f) - averageTone).ToArray(); + + loaded = true; + } + } + } + + private double[] Completion(double[] frqs) { + var list = new List(); + for (int i = 0; i < frqs.Length; i++) { + if (frqs[i] <= 60) { + int min = i - 1; + double minFrq = 0; + while (min >= 0) { + if (frqs[min] > 60) { + minFrq = frqs[min]; + break; + } + min--; + } + int max = i + 1; + double maxFrq = 0; + while (max < frqs.Length) { + if (frqs[max] > 60) { + maxFrq = frqs[max]; + break; + } + max++; + } + if (minFrq <= 60) { + list.Add(maxFrq); + } else if (maxFrq <= 60) { + list.Add(minFrq); + } else { + list.Add(MusicMath.Linear(min, max, minFrq, maxFrq, i)); + } + } else { + list.Add(frqs[i]); + } + } + return list.ToArray(); + } + } + public class Frq { public const int kHopSize = 256; @@ -12,6 +101,7 @@ public class Frq { public double averageF0; public double[] f0 = new double[0]; public double[] amp = new double[0]; + public int wavSampleLength = 0; /// /// If the wav path is null (machine learning voicebank), return false. diff --git a/OpenUtau.Core/Render/RenderPhrase.cs b/OpenUtau.Core/Render/RenderPhrase.cs index e34cbabe4..f2be55af8 100644 --- a/OpenUtau.Core/Render/RenderPhrase.cs +++ b/OpenUtau.Core/Render/RenderPhrase.cs @@ -4,6 +4,7 @@ using System.Linq; using System.Numerics; using K4os.Hash.xxHash; +using OpenUtau.Classic; using OpenUtau.Core.Ustx; using Serilog; @@ -299,69 +300,72 @@ internal RenderPhrase(UProject project, UTrack track, UVoicePart part, IEnumerab } } // Mod plus - if (track.TryGetExpDescriptor(project, Format.Ustx.MODP, out var modp) && renderer.SupportsExpression(modp)) { + if (track.TryGetExpDescriptor(project, Format.Ustx.MODP, out var modp) && renderer.SupportsExpression(modp) && singer is ClassicSinger cSinger) { foreach (var phoneme in phonemes) { - var mod = phoneme.GetExpression(project, track, Format.Ustx.MODP).Item1; - if (mod == 0) { + var phonemeModp = phoneme.GetExpression(project, track, Format.Ustx.MODP).Item1; + if (phonemeModp == 0) { continue; } try { - if (phoneme.TryGetFrq(out var frqFix, out var frqStretch, out double average, out int hopSize)) { - UTempo[] noteTempos = project.timeAxis.TemposBetweenTicks(part.position + phoneme.position, part.position + phoneme.End); - var tempo = noteTempos[0].bpm; // compromise 妥協! - var frqIntervalTick = MusicMath.TempoMsToTick(tempo, (double)1 * 1000 / 44100 * hopSize); - double consonantStretch = Math.Pow(2f, 1.0f - phoneme.GetExpression(project, track, Format.Ustx.VEL).Item1 / 100f); - - var preutter = MusicMath.TempoMsToTick(tempo, Math.Min(phoneme.preutter, phoneme.oto.Preutter * consonantStretch)); - int startIndex = Math.Max(0, (int)Math.Floor((phoneme.position - pitchStart - preutter) / pitchInterval)); - int position = (int)Math.Round((double)((phoneme.position - pitchStart) / pitchInterval)); - int startStretch = position + (int)Math.Round(MusicMath.TempoMsToTick(tempo, (phoneme.oto.Consonant - phoneme.oto.Preutter) * consonantStretch) / pitchInterval); - int endIndex = Math.Min(pitches.Length, (int)Math.Ceiling(phoneme.End - pitchStart - MusicMath.TempoMsToTick(tempo, phoneme.tailIntrude - phoneme.tailOverlap)) / pitchInterval); - - frqFix = frqFix.Select(f => f - average).ToArray(); - frqStretch = frqStretch.Select(f => f - average).ToArray(); - double stretch = 1; - if (frqStretch.Length * frqIntervalTick < ((double)endIndex - startStretch) * pitchInterval) { - stretch = ((double)endIndex - startStretch) * pitchInterval / (frqStretch.Length * frqIntervalTick); - } - var env0 = new Vector2(0, 0); - var env1 = new Vector2((phoneme.envelope.data[1].X - phoneme.envelope.data[0].X) / (phoneme.envelope.data[4].X - phoneme.envelope.data[0].X), 100); - var env3 = new Vector2((phoneme.envelope.data[3].X - phoneme.envelope.data[0].X) / (phoneme.envelope.data[4].X - phoneme.envelope.data[0].X), 100); - var env4 = new Vector2(1, 0); - - for (int i = 0; startStretch + i <= endIndex; i++) { - var pit = startStretch + i; - if (pit >= pitches.Length) break; - var frq = i * (pitchInterval / frqIntervalTick) / stretch; - var frqMin = Math.Clamp((int)Math.Floor(frq), 0, frqStretch.Length - 1); - var frqMax = Math.Clamp((int)Math.Ceiling(frq), 0, frqStretch.Length - 1); - var diff = MusicMath.Linear(frqMin, frqMax, frqStretch[frqMin], frqStretch[frqMax], frq); - diff = diff * mod / 100; - diff = Fade(diff, pit); - pitches[pit] = pitches[pit] + (float)(diff * 100); - } - for (int i = 0; startStretch + i - 1 >= startIndex; i--) { - var pit = startStretch + i - 1; - if (pit > endIndex || pit >= pitches.Length) continue; - var frq = frqFix.Length + (i * (pitchInterval / frqIntervalTick) / consonantStretch); - var frqMin = Math.Clamp((int)Math.Floor(frq), 0, frqFix.Length - 1); - var frqMax = Math.Clamp((int)Math.Ceiling(frq), 0, frqFix.Length - 1); - var diff = MusicMath.Linear(frqMin, frqMax, frqFix[frqMin], frqFix[frqMax], frq); - diff = diff * mod / 100; - diff = Fade(diff, pit); - pitches[pit] = pitches[pit] + (float)(diff * 100); + if (phoneme.oto.Frq == null) { + phoneme.oto.Frq = new OtoFrq(phoneme.oto, cSinger.Frqs); + } + if (phoneme.oto.Frq.loaded == false) { + continue; + } + var frq = phoneme.oto.Frq; + UTempo[] noteTempos = project.timeAxis.TemposBetweenTicks(part.position + phoneme.position, part.position + phoneme.End); + var tempo = noteTempos[0].bpm; // compromise 妥協! + var frqIntervalTick = MusicMath.TempoMsToTick(tempo, (double)1 * 1000 / 44100 * frq.hopSize); + double consonantStretch = Math.Pow(2f, 1.0f - phoneme.GetExpression(project, track, Format.Ustx.VEL).Item1 / 100f); + + var preutter = MusicMath.TempoMsToTick(tempo, Math.Min(phoneme.preutter, phoneme.oto.Preutter * consonantStretch)); + int startIndex = Math.Max(0, (int)Math.Floor((phoneme.position - pitchStart - preutter) / pitchInterval)); + int position = (int)Math.Round((double)((phoneme.position - pitchStart) / pitchInterval)); + int startStretch = position + (int)Math.Round(MusicMath.TempoMsToTick(tempo, (phoneme.oto.Consonant - phoneme.oto.Preutter) * consonantStretch) / pitchInterval); + int endIndex = Math.Min(pitches.Length, (int)Math.Ceiling(phoneme.End - pitchStart - MusicMath.TempoMsToTick(tempo, phoneme.tailIntrude - phoneme.tailOverlap)) / pitchInterval); + + double stretch = 1; + if (frq.toneDiffStretch.Length * frqIntervalTick < ((double)endIndex - startStretch) * pitchInterval) { + stretch = ((double)endIndex - startStretch) * pitchInterval / (frq.toneDiffStretch.Length * frqIntervalTick); + } + var env0 = new Vector2(0, 0); + var env1 = new Vector2((phoneme.envelope.data[1].X - phoneme.envelope.data[0].X) / (phoneme.envelope.data[4].X - phoneme.envelope.data[0].X), 100); + var env3 = new Vector2((phoneme.envelope.data[3].X - phoneme.envelope.data[0].X) / (phoneme.envelope.data[4].X - phoneme.envelope.data[0].X), 100); + var env4 = new Vector2(1, 0); + + for (int i = 0; startStretch + i <= endIndex; i++) { + var pit = startStretch + i; + if (pit >= pitches.Length) break; + var frqPoint = i * (pitchInterval / frqIntervalTick) / stretch; + var frqPointMin = Math.Clamp((int)Math.Floor(frqPoint), 0, frq.toneDiffStretch.Length - 1); + var frqPointMax = Math.Clamp((int)Math.Ceiling(frqPoint), 0, frq.toneDiffStretch.Length - 1); + var diff = MusicMath.Linear(frqPointMin, frqPointMax, frq.toneDiffStretch[frqPointMin], frq.toneDiffStretch[frqPointMax], frqPoint); + diff = diff * phonemeModp / 100; + diff = Fade(diff, pit); + pitches[pit] = pitches[pit] + (float)(diff * 100); + } + for (int i = 0; startStretch + i - 1 >= startIndex; i--) { + var pit = startStretch + i - 1; + if (pit > endIndex || pit >= pitches.Length) continue; + var frqPoint = frq.toneDiffFix.Length + (i * (pitchInterval / frqIntervalTick) / consonantStretch); + var frqPointMin = Math.Clamp((int)Math.Floor(frqPoint), 0, frq.toneDiffFix.Length - 1); + var frqPointMax = Math.Clamp((int)Math.Ceiling(frqPoint), 0, frq.toneDiffFix.Length - 1); + var diff = MusicMath.Linear(frqPointMin, frqPointMax, frq.toneDiffFix[frqPointMin], frq.toneDiffFix[frqPointMax], frqPoint); + diff = diff * phonemeModp / 100; + diff = Fade(diff, pit); + pitches[pit] = pitches[pit] + (float)(diff * 100); + } + double Fade(double diff, int pit) { + var percentage = (double)(pit - startIndex) / (endIndex - startIndex); + if (phoneme.Next != null && phoneme.End == phoneme.Next.position && percentage > env3.X) { + diff = diff * Math.Clamp(MusicMath.Linear(env3.X, env4.X, env3.Y, env4.Y, percentage), 0, 100) / 100; } - double Fade(double diff, int pit) { - var percentage = (double)(pit - startIndex) / (endIndex - startIndex); - if (phoneme.Next != null && phoneme.End == phoneme.Next.position && percentage > env3.X) { - diff = diff * Math.Clamp(MusicMath.Linear(env3.X, env4.X, env3.Y, env4.Y, percentage), 0, 100) / 100; - } - if (phoneme.Prev != null && phoneme.Prev.End == phoneme.position && percentage < env1.X) { - diff = diff * Math.Clamp(MusicMath.Linear(env0.X, env1.X, env0.Y, env1.Y, percentage), 0, 100) / 100; - } - return diff; + if (phoneme.Prev != null && phoneme.Prev.End == phoneme.position && percentage < env1.X) { + diff = diff * Math.Clamp(MusicMath.Linear(env0.X, env1.X, env0.Y, env1.Y, percentage), 0, 100) / 100; } + return diff; } } catch(Exception e) { Log.Error(e, "Failed to compute mod plus."); diff --git a/OpenUtau.Core/Ustx/UPhoneme.cs b/OpenUtau.Core/Ustx/UPhoneme.cs index 10003f6db..6c46e5016 100644 --- a/OpenUtau.Core/Ustx/UPhoneme.cs +++ b/OpenUtau.Core/Ustx/UPhoneme.cs @@ -2,9 +2,6 @@ using System.Collections.Generic; using System.Linq; using System.Numerics; -using NAudio.Wave; -using OpenUtau.Classic; -using SharpCompress; using YamlDotNet.Serialization; namespace OpenUtau.Core.Ustx { @@ -237,77 +234,6 @@ public string GetVoiceColor(UProject project, UTrack track) { } return track.VoiceColorExp.options[index]; } - - public bool TryGetFrq(out double[] frqFix, out double[] frqStretch, out double average, out int hopSize) { - frqFix = new double[0]; - frqStretch = new double[0]; - average = 0; - hopSize = 0; - - var frq = new Frq(); - if (frq.Load(oto.File)) { - average = MusicMath.FreqToTone(frq.averageF0); // 1 = 1tone - hopSize = frq.hopSize; - - int wavLength; - using (var waveStream = Format.Wave.OpenFile(oto.File)) { - var sampleProvider = waveStream.ToSampleProvider(); - if (sampleProvider.WaveFormat.SampleRate != 44100) { - return false; - } - wavLength = Format.Wave.GetSamples(sampleProvider).Length; - } - - int offset = (int)Math.Floor(oto.Offset * 44100 / 1000 / frq.hopSize); // frq samples - int consonant = (int)Math.Floor((oto.Offset + oto.Consonant) * 44100 / 1000 / frq.hopSize); - int cutoff = oto.Cutoff < 0 ? - (int)Math.Floor((oto.Offset - oto.Cutoff) * 44100 / 1000 / frq.hopSize) - : wavLength - (int)Math.Floor(oto.Cutoff * 44100 / 1000 / frq.hopSize); - var avr = average; - var f0 = Completion(frq.f0); - frqFix = f0.Skip(offset).Take(consonant - offset).Select(f => MusicMath.FreqToTone(f)).ToArray(); - frqStretch = f0.Skip(consonant).Take(cutoff - consonant).Select(f => MusicMath.FreqToTone(f)).ToArray(); - - double[] Completion(double[] frqs) { - var list = new List(); - for (int i = 0; i < frqs.Length; i++) { - if (frqs[i] <= 0) { - int min = i - 1; - double minFrq = 0; - while (min >= 0) { - if (frqs[min] > 0) { - minFrq = frqs[min]; - break; - } - min--; - } - int max = i + 1; - double maxFrq = 0; - while (max < frqs.Length) { - if (frqs[max] > 0) { - maxFrq = frqs[max]; - break; - } - max++; - } - if(minFrq <= 0) { - list.Add(maxFrq); - } else if (maxFrq <= 0) { - list.Add(minFrq); - } else { - list.Add(MusicMath.Linear(min, max, minFrq, maxFrq, i)); - } - } else { - list.Add(frqs[i]); - } - } - return list.ToArray(); - } - return true; - } else { - return false; - } - } } public class UEnvelope { diff --git a/OpenUtau.Core/Ustx/USinger.cs b/OpenUtau.Core/Ustx/USinger.cs index 20def8742..907aa3512 100644 --- a/OpenUtau.Core/Ustx/USinger.cs +++ b/OpenUtau.Core/Ustx/USinger.cs @@ -52,6 +52,7 @@ public double Overlap { NotifyPropertyChanged(nameof(Overlap)); } } + public OtoFrq Frq { get;set; } public List SearchTerms { get; private set; } public event PropertyChangedEventHandler PropertyChanged; From f2c64ee2b4da95030896e061ed29b6908c63cd96 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Sun, 31 Mar 2024 18:36:19 +0900 Subject: [PATCH 10/27] Added the ability to display License --- OpenUtau.Core/Voicevox/VoicevoxConfig.cs | 98 +++++++++++++++++++--- OpenUtau.Core/Voicevox/VoicevoxRenderer.cs | 5 ++ OpenUtau.Core/Voicevox/VoicevoxUtils.cs | 5 ++ 3 files changed, 98 insertions(+), 10 deletions(-) diff --git a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs index 23e5ed7d6..b584996fb 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs @@ -10,6 +10,11 @@ using Serilog; using static OpenUtau.Api.Phonemizer; +/* + * This source code is partially based on the VOICEVOX engine. + * https://github.com/VOICEVOX/voicevox_engine/blob/master/LGPL_LICENSE + */ + namespace OpenUtau.Core.Voicevox { public class VoicevoxConfig { //Information that each Singer has @@ -31,6 +36,18 @@ public class VoicevoxConfig { public static VoicevoxConfig Load(USinger singer) { try { + var response = VoicevoxClient.Inst.SendRequest(new VoicevoxURL() { method = "GET", path = "/engine_manifest" }); + var jObj = JObject.Parse(response.Item1); + if (jObj.ContainsKey("detail")) { + Log.Error($"Response was incorrect. : {jObj}"); + } + var manifest = jObj.ToObject(); + manifest.SaveLicenses(singer.Location); + } catch { + Log.Error("Could not load Licenses."); + } + try { + var response = VoicevoxClient.Inst.SendRequest(new VoicevoxURL() { method = "GET", path = "/singers" }); var jObj = JObject.Parse(response.Item1); if (jObj.ContainsKey("detail")) { @@ -45,7 +62,7 @@ public static VoicevoxConfig Load(USinger singer) { var filePath = Path.Join(folderPath, "character.yaml"); if (!File.Exists(filePath)) { Directory.CreateDirectory(folderPath); - string typename = string.Empty ; + string typename = string.Empty; SingerTypeUtils.SingerTypeNames.TryGetValue(USingerType.Voicevox, out typename); var config = new VoicebankConfig() { Name = voicevoxConfig.name, @@ -69,7 +86,7 @@ public static VoicevoxConfig Load(USinger singer) { return new VoicevoxConfig(); } public void LoadInfo(VoicevoxConfig voicevoxConfig, string location) { - if(voicevoxConfig.style_infos == null) { + if (voicevoxConfig.style_infos == null) { var queryurl = new VoicevoxURL() { method = "GET", path = "/singer_info", query = new Dictionary { { "speaker_uuid", voicevoxConfig.speaker_uuid } } }; var response = VoicevoxClient.Inst.SendRequest(queryurl); var jObj = JObject.Parse(response.Item1); @@ -86,6 +103,67 @@ public void LoadInfo(VoicevoxConfig voicevoxConfig, string location) { } } + public class Engine_manifest { + public class Update_infos { + public string version { get; set; } + public IList descriptions { get; set; } + public IList contributors { get; set; } + + } + public class Dependency_licenses { + public string name { get; set; } + public string version { get; set; } + public string license { get; set; } + public string text { get; set; } + + } + public class Supported_features { + public bool adjust_mora_pitch { get; set; } + public bool adjust_phoneme_length { get; set; } + public bool adjust_speed_scale { get; set; } + public bool adjust_pitch_scale { get; set; } + public bool adjust_intonation_scale { get; set; } + public bool adjust_volume_scale { get; set; } + public bool interrogative_upspeak { get; set; } + public bool synthesis_morphing { get; set; } + public bool sing { get; set; } + public bool manage_library { get; set; } + + } + + public string manifest_version { get; set; } + public string name { get; set; } + public string brand_name { get; set; } + public string uuid { get; set; } + public string url { get; set; } + public string icon { get; set; } + public int default_sampling_rate { get; set; } + public int frame_rate { get; set; } + public string terms_of_service { get; set; } + public IList update_infos { get; set; } + public IList dependency_licenses { get; set; } + public string supported_vvlib_manifest_version { get; set; } + public Supported_features supported_features { get; set; } + + public void SaveLicenses(string location) { + var parentDirectory = Directory.GetParent(location).ToString(); + var licenseDirectory = Path.Join(parentDirectory, "Licenses"); + if (!Directory.Exists(licenseDirectory)) { + Directory.CreateDirectory(licenseDirectory); + } + var filePath = Path.Join(licenseDirectory, "terms_of_service.txt"); + if (!string.IsNullOrEmpty(terms_of_service)) { + File.WriteAllText(filePath, terms_of_service); + } + foreach (var item in dependency_licenses) { + filePath = Path.Join(licenseDirectory, $"{item.name}_License.txt"); + if (!string.IsNullOrEmpty(item.text)) { + File.WriteAllText(filePath, $"license:{item.license}\nversion:{item.version}\n\n" +item.text); + } + } + } + } + public class Phoneme_list { public string[] vowels; public string[] consonants; @@ -93,7 +171,7 @@ public class Phoneme_list { } public class Dictionary_list { - public Dictionary dict = new Dictionary(); + public Dictionary dict = new Dictionary(); public void Loaddic(string location) { try { @@ -112,12 +190,12 @@ public void Loaddic(string location) { } } - }catch (Exception e) { + } catch (Exception e) { Log.Error($"Failed to read dictionary file. : {e}"); } } - public string Lyrictodic(Note[][] notes,int index) { + public string Lyrictodic(Note[][] notes, int index) { if (dict.TryGetValue(notes[index][0].lyric, out var lyric_)) { if (string.IsNullOrEmpty(lyric_)) { return ""; @@ -144,7 +222,7 @@ public void SetInfo(VoicevoxConfig voicevoxConfig, string location) { Log.Information($"Begin setup of Voicevox SingerInfo."); try { var readmePath = Path.Join(location, "readme.txt"); - if (!string.IsNullOrEmpty(this.policy) && !File.Exists(readmePath)) { + if (!string.IsNullOrEmpty(this.policy)) { voicevoxConfig.policy = this.policy; File.WriteAllText(readmePath, this.policy); } @@ -167,15 +245,15 @@ public void SetInfo(VoicevoxConfig voicevoxConfig, string location) { voicevoxConfig.style_infos[i].id = this.style_infos[i].id; } } - } catch (Exception e){ + } catch (Exception e) { Log.Error($"Could not create character file. : {e}"); } Log.Information($"Voicevox SingerInfo setup complete."); } - public void checkAndSetFiles(string base64str,string filePath) { - if (!String.IsNullOrEmpty(base64str) && !File.Exists(filePath)) { - Base64.Base64ToFile(base64str, filePath); + public void checkAndSetFiles(string base64str, string filePath) { + if (!String.IsNullOrEmpty(base64str)) { + Base64.Base64ToFile(base64str, filePath); } } } diff --git a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs index 0ffc4f4fd..607b4f1ac 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs @@ -14,6 +14,11 @@ using Serilog; using static OpenUtau.Api.Phonemizer; +/* + * This source code is partially based on the VOICEVOX engine. + * https://github.com/VOICEVOX/voicevox_engine/blob/master/LGPL_LICENSE + */ + namespace OpenUtau.Core.Voicevox { public class VoicevoxRenderer : IRenderer { const string VOLC = VoicevoxUtils.VOLC; diff --git a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs index adee594ff..79300767e 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs @@ -7,6 +7,11 @@ using Serilog; using static OpenUtau.Api.Phonemizer; +/* + * This source code is partially based on the VOICEVOX engine. + * https://github.com/VOICEVOX/voicevox_engine/blob/master/LGPL_LICENSE + */ + namespace OpenUtau.Core.Voicevox { public class Phonemes { public string phoneme; From 3a8ac65ec8a401a2f664921f9e3caebb57f97086 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Sun, 31 Mar 2024 18:39:32 +0900 Subject: [PATCH 11/27] Remove { get; set; } --- OpenUtau.Core/Voicevox/VoicevoxConfig.cs | 62 ++++++++++++------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs index b584996fb..60a83664a 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs @@ -105,45 +105,45 @@ public void LoadInfo(VoicevoxConfig voicevoxConfig, string location) { public class Engine_manifest { public class Update_infos { - public string version { get; set; } - public IList descriptions { get; set; } - public IList contributors { get; set; } + public string version; + public IList descriptions; + public IList contributors; } public class Dependency_licenses { - public string name { get; set; } - public string version { get; set; } - public string license { get; set; } - public string text { get; set; } + public string name; + public string version; + public string license; + public string text; } public class Supported_features { - public bool adjust_mora_pitch { get; set; } - public bool adjust_phoneme_length { get; set; } - public bool adjust_speed_scale { get; set; } - public bool adjust_pitch_scale { get; set; } - public bool adjust_intonation_scale { get; set; } - public bool adjust_volume_scale { get; set; } - public bool interrogative_upspeak { get; set; } - public bool synthesis_morphing { get; set; } - public bool sing { get; set; } - public bool manage_library { get; set; } + public bool adjust_mora_pitch; + public bool adjust_phoneme_length; + public bool adjust_speed_scale; + public bool adjust_pitch_scale; + public bool adjust_intonation_scale; + public bool adjust_volume_scale; + public bool interrogative_upspeak; + public bool synthesis_morphing; + public bool sing; + public bool manage_library; } - public string manifest_version { get; set; } - public string name { get; set; } - public string brand_name { get; set; } - public string uuid { get; set; } - public string url { get; set; } - public string icon { get; set; } - public int default_sampling_rate { get; set; } - public int frame_rate { get; set; } - public string terms_of_service { get; set; } - public IList update_infos { get; set; } - public IList dependency_licenses { get; set; } - public string supported_vvlib_manifest_version { get; set; } - public Supported_features supported_features { get; set; } + public string manifest_version; + public string name; + public string brand_name; + public string uuid; + public string url; + public string icon; + public int default_sampling_rate; + public int frame_rate; + public string terms_of_service; + public IList update_infos; + public IList dependency_licenses; + public string supported_vvlib_manifest_version; + public Supported_features supported_features; public void SaveLicenses(string location) { var parentDirectory = Directory.GetParent(location).ToString(); @@ -158,7 +158,7 @@ public void SaveLicenses(string location) { foreach (var item in dependency_licenses) { filePath = Path.Join(licenseDirectory, $"{item.name}_License.txt"); if (!string.IsNullOrEmpty(item.text)) { - File.WriteAllText(filePath, $"license:{item.license}\nversion:{item.version}\n\n" +item.text); + File.WriteAllText(filePath, $"license:{item.license}\nversion:{item.version}\n\n" + item.text); } } } From ab9842ec30dab055aff7a03021825e3523b999b6 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Sun, 31 Mar 2024 19:05:52 +0900 Subject: [PATCH 12/27] Fixed file not being newly created --- OpenUtau.Core/Voicevox/VoicevoxConfig.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs index 60a83664a..a69976844 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs @@ -43,8 +43,8 @@ public static VoicevoxConfig Load(USinger singer) { } var manifest = jObj.ToObject(); manifest.SaveLicenses(singer.Location); - } catch { - Log.Error("Could not load Licenses."); + } catch(Exception e) { + Log.Error($"Could not load Licenses.:{e}"); } try { @@ -156,6 +156,7 @@ public void SaveLicenses(string location) { File.WriteAllText(filePath, terms_of_service); } foreach (var item in dependency_licenses) { + item.name = item.name.Replace("\"",""); filePath = Path.Join(licenseDirectory, $"{item.name}_License.txt"); if (!string.IsNullOrEmpty(item.text)) { File.WriteAllText(filePath, $"license:{item.license}\nversion:{item.version}\n\n" + item.text); From bee9706707d08b602e6f60794f6e176fa2ca21c5 Mon Sep 17 00:00:00 2001 From: General Nuisance <58832897+GeneralNuisance0@users.noreply.github.com> Date: Thu, 4 Apr 2024 00:04:04 -0400 Subject: [PATCH 13/27] Update EnglishVCCVPhonemizer.cs Added custom vowels "L", "W", and "Y" to account for voicebanks that contain support for syllabic Ls and Canadian Raising and use a custom dictionary. --- OpenUtau.Plugin.Builtin/EnglishVCCVPhonemizer.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/OpenUtau.Plugin.Builtin/EnglishVCCVPhonemizer.cs b/OpenUtau.Plugin.Builtin/EnglishVCCVPhonemizer.cs index 3ad330f0d..04d219f16 100644 --- a/OpenUtau.Plugin.Builtin/EnglishVCCVPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/EnglishVCCVPhonemizer.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.IO; using System.Linq; @@ -16,7 +16,7 @@ namespace OpenUtau.Plugin.Builtin { // Thanks to cubialpha, Cz, Halo/BagelHero, nago, and AnAndroNerd for their help. public class EnglishVCCVPhonemizer : SyllableBasedPhonemizer { - private readonly string[] vowels = "a,@,u,0,8,I,e,3,A,i,E,O,Q,6,o,1ng,9,&,x,1".Split(","); + private readonly string[] vowels = "a,@,u,0,8,I,e,3,A,i,E,O,Q,6,o,1ng,9,&,x,1,Y,L,W".Split(","); private readonly string[] consonants = "b,ch,d,dh,f,g,h,j,k,l,m,n,ng,p,r,s,sh,t,th,v,w,y,z,zh,dd,hh,sp,st".Split(","); private readonly Dictionary dictionaryReplacements = ("aa=a;ae=@;ah=u;ao=9;aw=8;ay=I;" + "b=b;ch=ch;d=d;dh=dh;eh=e;er=3;ey=A;f=f;g=g;hh=h;hhy=hh;ih=i;iy=E;jh=j;k=k;l=l;m=m;n=n;ng=ng;ow=O;oy=Q;" + From d05af19e6518fa33bda0259331d1049631b90df9 Mon Sep 17 00:00:00 2001 From: yqzhishen Date: Fri, 5 Apr 2024 17:17:09 +0800 Subject: [PATCH 14/27] Add continuous acceleration profiles --- OpenUtau.Core/DiffSinger/DiffSingerConfig.cs | 25 +++++++-- OpenUtau.Core/DiffSinger/DiffSingerPitch.cs | 16 +++++- .../DiffSinger/DiffSingerRenderer.cs | 56 +++++++++++++------ .../DiffSinger/DiffSingerVariance.cs | 16 +++++- OpenUtau.Core/Util/Preferences.cs | 4 +- OpenUtau/Strings/Strings.axaml | 2 +- OpenUtau/Strings/Strings.de-DE.axaml | 2 +- OpenUtau/Strings/Strings.es-ES.axaml | 2 +- OpenUtau/Strings/Strings.es-MX.axaml | 2 +- OpenUtau/Strings/Strings.fi-FI.axaml | 2 +- OpenUtau/Strings/Strings.fr-FR.axaml | 2 +- OpenUtau/Strings/Strings.id-ID.axaml | 2 +- OpenUtau/Strings/Strings.it-IT.axaml | 2 +- OpenUtau/Strings/Strings.ja-JP.axaml | 2 +- OpenUtau/Strings/Strings.ko-KR.axaml | 2 +- OpenUtau/Strings/Strings.nl-NL.axaml | 2 +- OpenUtau/Strings/Strings.pl-PL.axaml | 2 +- OpenUtau/Strings/Strings.pt-BR.axaml | 2 +- OpenUtau/Strings/Strings.ru-RU.axaml | 2 +- OpenUtau/Strings/Strings.th-TH.axaml | 2 +- OpenUtau/Strings/Strings.vi-VN.axaml | 2 +- OpenUtau/Strings/Strings.zh-CN.axaml | 2 +- OpenUtau/Strings/Strings.zh-TW.axaml | 2 +- OpenUtau/ViewModels/PreferencesViewModel.cs | 16 +++--- OpenUtau/Views/PreferencesDialog.axaml | 10 ++-- 25 files changed, 118 insertions(+), 61 deletions(-) diff --git a/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs b/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs index aef933d8b..4486250ba 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs @@ -1,8 +1,8 @@ using System; using System.Collections.Generic; +using YamlDotNet.Serialization; namespace OpenUtau.Core.DiffSinger { - [Serializable] public class RandomPitchShifting { public float[] range; @@ -27,8 +27,24 @@ public class DsConfig { public bool useVoicingEmbed = false; public bool useTensionEmbed = false; public AugmentationArgs augmentationArgs; - public bool useShallowDiffusion = false; - public int maxDepth = -1; + public bool useContinuousAcceleration = false; + [YamlMember(Alias = "use_shallow_diffusion")] public bool? _useShallowDiffusion; + [YamlMember(Alias = "use_variable_depth")] public bool? _useVariableDepth; + [YamlIgnore] + public bool useVariableDepth { + get { + // coalesce _useDepth and _useShallowDiffusion + if (_useVariableDepth.HasValue) { + return _useVariableDepth.Value; + } + if (_useShallowDiffusion.HasValue) { + return _useShallowDiffusion.Value; + } + return false; + } + } + [YamlMember(Alias = "max_depth")] public double _maxDepth; + [YamlIgnore] public double maxDepth => useContinuousAcceleration ? _maxDepth : _maxDepth / 1000.0; public string dur; public string linguistic; public string pitch; @@ -49,7 +65,8 @@ public class DsConfig { public double mel_fmax = 16000; public string mel_base = "10"; // or "e" public string mel_scale = "slaney"; // or "htk" - public float frameMs(){ + + public float frameMs() { return 1000f * hop_size / sample_rate; } } diff --git a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs index 503c6cecb..bd4ed837b 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs @@ -209,7 +209,6 @@ public RenderPitchResult Process(RenderPhrase phrase){ note_dur[^1]=totalFrames-note_dur.Sum(); var pitch = Enumerable.Repeat(60f, totalFrames).ToArray(); var retake = Enumerable.Repeat(true, totalFrames).ToArray(); - var speedup = Preferences.Default.DiffsingerSpeedup; var pitchInputs = new List(); pitchInputs.Add(NamedOnnxValue.CreateFromTensor("encoder_out", encoder_out)); pitchInputs.Add(NamedOnnxValue.CreateFromTensor("note_midi", @@ -227,8 +226,19 @@ public RenderPitchResult Process(RenderPhrase phrase){ pitchInputs.Add(NamedOnnxValue.CreateFromTensor("retake", new DenseTensor(retake, new int[] { retake.Length }, false) .Reshape(new int[] { 1, retake.Length }))); - pitchInputs.Add(NamedOnnxValue.CreateFromTensor("speedup", - new DenseTensor(new long[] { speedup }, new int[] { 1 },false))); + var steps = Preferences.Default.DiffSingerSteps; + if (dsConfig.useContinuousAcceleration) { + pitchInputs.Add(NamedOnnxValue.CreateFromTensor("steps", + new DenseTensor(new long[] { steps }, new int[] { 1 }, false))); + } else { + // find a largest integer speedup that are less than 1000 / steps and is a factor of 1000 + long speedup = Math.Max(1, 1000 / steps); + while (1000 % speedup != 0 && speedup > 1) { + speedup--; + } + pitchInputs.Add(NamedOnnxValue.CreateFromTensor("speedup", + new DenseTensor(new long[] { speedup }, new int[] { 1 },false))); + } //expressiveness if (dsConfig.use_expr) { diff --git a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs index 168b5f844..f20cb0752 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs @@ -12,6 +12,7 @@ using OpenUtau.Core.Render; using OpenUtau.Core.SignalChain; using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util; using Serilog; namespace OpenUtau.Core.DiffSinger { @@ -77,22 +78,22 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra var result = Layout(phrase); // calculate real depth - int speedup = Core.Util.Preferences.Default.DiffsingerSpeedup; var singer = (DiffSingerSinger) phrase.singer; - int depth = Core.Util.Preferences.Default.DiffSingerDepth; - if (singer.dsConfig.useShallowDiffusion) { - int kStep = singer.dsConfig.maxDepth; - if (kStep < 0) { + double depth = Preferences.Default.DiffSingerDepth; + int steps = Preferences.Default.DiffSingerSteps; + if (singer.dsConfig.useVariableDepth) { + double maxDepth = singer.dsConfig.maxDepth; + if (maxDepth < 0) { throw new InvalidDataException("Max depth is unset or is negative."); } - depth = Math.Min(depth, kStep); // make sure depth <= K_step - depth = depth / speedup * speedup; // make sure depth can be divided by speedup + depth = Math.Min(depth, maxDepth); // make sure depth <= K_step } - var wavName = singer.dsConfig.useShallowDiffusion - ? $"ds-{phrase.hash:x16}-depth{depth}-{speedup}x.wav" // if the depth changes, phrase should be re-rendered - : $"ds-{phrase.hash:x16}-{speedup}x.wav"; // preserve this for not invalidating cache from older versions + // format depth with 3 decimal places + var wavName = singer.dsConfig.useVariableDepth + ? $"ds-{phrase.hash:x16}-depth{depth:f2}-steps{steps}.wav" // if the depth changes, phrase should be re-rendered + : $"ds-{phrase.hash:x16}-steps{steps}.wav"; // preserve this for models without depth var wavPath = Path.Join(PathManager.Inst.CachePath, wavName); - string progressInfo = $"Track {trackNo + 1}: {this}{speedup}x \"{string.Join(" ", phrase.phones.Select(p => p.phoneme))}\""; + string progressInfo = $"Track {trackNo + 1}: {this} depth={depth:f2} steps={steps} \"{string.Join(" ", phrase.phones.Select(p => p.phoneme))}\""; if (File.Exists(wavPath)) { try { using (var waveStream = Wave.OpenFile(wavPath)) { @@ -103,7 +104,7 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra } } if (result.samples == null) { - result.samples = InvokeDiffsinger(phrase, depth, speedup, cancellation); + result.samples = InvokeDiffsinger(phrase, depth, steps, cancellation); if (result.samples != null) { var source = new WaveSource(0, 0, 0, 1); source.SetSamples(result.samples); @@ -124,7 +125,7 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra leadingMs、positionMs、estimatedLengthMs: timeaxis layout in Ms, double */ - float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup, CancellationTokenSource cancellation) { + float[] InvokeDiffsinger(RenderPhrase phrase, double depth, int steps, CancellationTokenSource cancellation) { var singer = phrase.singer as DiffSingerSinger; //Check if dsconfig.yaml is correct if(String.IsNullOrEmpty(singer.dsConfig.vocoder) || @@ -233,12 +234,31 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup, Cancellati acousticInputs.Add(NamedOnnxValue.CreateFromTensor("f0",f0tensor)); // sampling acceleration related - if (singer.dsConfig.useShallowDiffusion) { - acousticInputs.Add(NamedOnnxValue.CreateFromTensor("depth", - new DenseTensor(new long[] { depth }, new int[] { 1 }, false))); + if (singer.dsConfig.useContinuousAcceleration) { + if (singer.dsConfig.useVariableDepth) { + acousticInputs.Add(NamedOnnxValue.CreateFromTensor("depth", + new DenseTensor(new float[] {(float)depth}, new int[] { 1 }, false))); + } + acousticInputs.Add(NamedOnnxValue.CreateFromTensor("steps", + new DenseTensor(new long[] { steps }, new int[] { 1 }, false))); + } else { + long speedup; + if (singer.dsConfig.useVariableDepth) { + long int64Depth = (long) Math.Round(depth * 1000); + speedup = Math.Max(1, int64Depth / steps); + int64Depth = int64Depth / speedup * speedup; // make sure depth can be divided by speedup + acousticInputs.Add(NamedOnnxValue.CreateFromTensor("depth", + new DenseTensor(new long[] { int64Depth }, new int[] { 1 }, false))); + } else { + // find a largest integer speedup that are less than 1000 / steps and is a factor of 1000 + speedup = Math.Max(1, 1000 / steps); + while (1000 % speedup != 0 && speedup > 1) { + speedup--; + } + } + acousticInputs.Add(NamedOnnxValue.CreateFromTensor("speedup", + new DenseTensor(new long[] { speedup }, new int[] { 1 }, false))); } - acousticInputs.Add(NamedOnnxValue.CreateFromTensor("speedup", - new DenseTensor(new long[] { speedup }, new int[] { 1 },false))); //speaker if(singer.dsConfig.speakers != null) { diff --git a/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs b/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs index 017d04ea2..dd8033491 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs @@ -129,7 +129,6 @@ public VarianceResult Process(RenderPhrase phrase){ var pitch = DiffSingerUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, totalFrames, headFrames, tailFrames, x => x * 0.01) .Select(f => (float)f).ToArray(); - var speedup = Preferences.Default.DiffsingerSpeedup; var varianceInputs = new List(); varianceInputs.Add(NamedOnnxValue.CreateFromTensor("encoder_out", encoder_out)); @@ -174,8 +173,19 @@ public VarianceResult Process(RenderPhrase phrase){ varianceInputs.Add(NamedOnnxValue.CreateFromTensor("retake", new DenseTensor(retake, new int[] { retake.Length }, false) .Reshape(new int[] { 1, totalFrames, numVariances }))); - varianceInputs.Add(NamedOnnxValue.CreateFromTensor("speedup", - new DenseTensor(new long[] { speedup }, new int[] { 1 },false))); + var steps = Preferences.Default.DiffSingerSteps; + if (dsConfig.useContinuousAcceleration) { + varianceInputs.Add(NamedOnnxValue.CreateFromTensor("steps", + new DenseTensor(new long[] { steps }, new int[] { 1 }, false))); + } else { + // find a largest integer speedup that are less than 1000 / steps and is a factor of 1000 + long speedup = Math.Max(1, 1000 / steps); + while (1000 % speedup != 0 && speedup > 1) { + speedup--; + } + varianceInputs.Add(NamedOnnxValue.CreateFromTensor("speedup", + new DenseTensor(new long[] { speedup }, new int[] { 1 },false))); + } //Speaker if(dsConfig.speakers != null) { var speakerEmbedManager = getSpeakerEmbedManager(); diff --git a/OpenUtau.Core/Util/Preferences.cs b/OpenUtau.Core/Util/Preferences.cs index 1baedf258..48900d712 100644 --- a/OpenUtau.Core/Util/Preferences.cs +++ b/OpenUtau.Core/Util/Preferences.cs @@ -142,8 +142,8 @@ public class SerializablePreferences { public int WorldlineR = 0; public string OnnxRunner = string.Empty; public int OnnxGpu = 0; - public int DiffsingerSpeedup = 50; - public int DiffSingerDepth = 1000; + public double DiffSingerDepth = 1.0; + public int DiffSingerSteps = 20; public bool SkipRenderingMutedTracks = false; public string Language = string.Empty; public string? SortingOrder = null; diff --git a/OpenUtau/Strings/Strings.axaml b/OpenUtau/Strings/Strings.axaml index cccdafb0b..3e862a818 100644 --- a/OpenUtau/Strings/Strings.axaml +++ b/OpenUtau/Strings/Strings.axaml @@ -368,7 +368,7 @@ Warning: this option removes custom presets. Rendering Default renderer (for classic voicebanks) DiffSinger Render Depth - DiffSinger Render Speedup + DiffSinger Render Steps GPU Machine Learning Runner Phase Compensation diff --git a/OpenUtau/Strings/Strings.de-DE.axaml b/OpenUtau/Strings/Strings.de-DE.axaml index 85f61d3a2..c11a8f61f 100644 --- a/OpenUtau/Strings/Strings.de-DE.axaml +++ b/OpenUtau/Strings/Strings.de-DE.axaml @@ -364,7 +364,7 @@ Warnung: Diese Option entfernt alle benutzerdefinierten Einstellungen.Rendering--> - + Machinelles Lernen Runner Phasenkompensation diff --git a/OpenUtau/Strings/Strings.es-ES.axaml b/OpenUtau/Strings/Strings.es-ES.axaml index d3f5867a3..9e5875cea 100644 --- a/OpenUtau/Strings/Strings.es-ES.axaml +++ b/OpenUtau/Strings/Strings.es-ES.axaml @@ -364,7 +364,7 @@ Warning: this option removes custom presets.--> - + diff --git a/OpenUtau/Strings/Strings.es-MX.axaml b/OpenUtau/Strings/Strings.es-MX.axaml index 96d567f06..b520f05f2 100644 --- a/OpenUtau/Strings/Strings.es-MX.axaml +++ b/OpenUtau/Strings/Strings.es-MX.axaml @@ -360,7 +360,7 @@ Advertencia: Esta opción eliminará las bases personalizadas. Renderización - + Compensación de fase diff --git a/OpenUtau/Strings/Strings.fi-FI.axaml b/OpenUtau/Strings/Strings.fi-FI.axaml index 0349966a4..83709f879 100644 --- a/OpenUtau/Strings/Strings.fi-FI.axaml +++ b/OpenUtau/Strings/Strings.fi-FI.axaml @@ -364,7 +364,7 @@ Warning: this option removes custom presets.--> Renderöi - + diff --git a/OpenUtau/Strings/Strings.fr-FR.axaml b/OpenUtau/Strings/Strings.fr-FR.axaml index e3b2fce90..14f161d24 100644 --- a/OpenUtau/Strings/Strings.fr-FR.axaml +++ b/OpenUtau/Strings/Strings.fr-FR.axaml @@ -360,7 +360,7 @@ Attention: cela va effacer le préréglage. En train de rendre... - + Compensation de phase diff --git a/OpenUtau/Strings/Strings.id-ID.axaml b/OpenUtau/Strings/Strings.id-ID.axaml index e8dd72b25..0340e8b7e 100644 --- a/OpenUtau/Strings/Strings.id-ID.axaml +++ b/OpenUtau/Strings/Strings.id-ID.axaml @@ -366,7 +366,7 @@ Peringatan: opsi ini menghapus prasetel khusus. Merender Perender bawaan (untuk voicebank klasik) Kedalaman Render DiffSinger - Kecepatan Render DiffSinger + DiffSinger Render Steps Penjalan Pembelajaran Mesin Kompensasi Fase diff --git a/OpenUtau/Strings/Strings.it-IT.axaml b/OpenUtau/Strings/Strings.it-IT.axaml index cb7dc91ff..5bfcabee2 100644 --- a/OpenUtau/Strings/Strings.it-IT.axaml +++ b/OpenUtau/Strings/Strings.it-IT.axaml @@ -364,7 +364,7 @@ Tieni premuto Ctrl per selezionare Renderizzazione - + Compensazione di Fase diff --git a/OpenUtau/Strings/Strings.ja-JP.axaml b/OpenUtau/Strings/Strings.ja-JP.axaml index 8e66086f4..4152f8c0d 100644 --- a/OpenUtau/Strings/Strings.ja-JP.axaml +++ b/OpenUtau/Strings/Strings.ja-JP.axaml @@ -368,7 +368,7 @@ レンダリング Classic UTAU音源のデフォルトレンダラー - + 位相補正 diff --git a/OpenUtau/Strings/Strings.ko-KR.axaml b/OpenUtau/Strings/Strings.ko-KR.axaml index 96c6d7455..415a73cfd 100644 --- a/OpenUtau/Strings/Strings.ko-KR.axaml +++ b/OpenUtau/Strings/Strings.ko-KR.axaml @@ -364,7 +364,7 @@ 렌더링 기본 렌더러 (Classic 음원용) DiffSinger 렌더링 깊이(Depth) - DiffSinger 렌더링 가속 + DiffSinger Render Steps 그래픽 카드(GPU) 머신러닝 실행 장치 위상 보정 diff --git a/OpenUtau/Strings/Strings.nl-NL.axaml b/OpenUtau/Strings/Strings.nl-NL.axaml index 2cbc16916..3e0f2bf4a 100644 --- a/OpenUtau/Strings/Strings.nl-NL.axaml +++ b/OpenUtau/Strings/Strings.nl-NL.axaml @@ -360,7 +360,7 @@ Ctrl ingedrukt houden om te selecteren Renderen - + Fase Compensatie diff --git a/OpenUtau/Strings/Strings.pl-PL.axaml b/OpenUtau/Strings/Strings.pl-PL.axaml index aa6de5671..491d46682 100644 --- a/OpenUtau/Strings/Strings.pl-PL.axaml +++ b/OpenUtau/Strings/Strings.pl-PL.axaml @@ -364,7 +364,7 @@ Warning: this option removes custom presets.--> Renderowanie - + diff --git a/OpenUtau/Strings/Strings.pt-BR.axaml b/OpenUtau/Strings/Strings.pt-BR.axaml index fa7408a7c..7b2652e64 100644 --- a/OpenUtau/Strings/Strings.pt-BR.axaml +++ b/OpenUtau/Strings/Strings.pt-BR.axaml @@ -360,7 +360,7 @@ Segure Ctrl para selecionar Renderização - + Compensação de Fase diff --git a/OpenUtau/Strings/Strings.ru-RU.axaml b/OpenUtau/Strings/Strings.ru-RU.axaml index fe534d629..77981f1d7 100644 --- a/OpenUtau/Strings/Strings.ru-RU.axaml +++ b/OpenUtau/Strings/Strings.ru-RU.axaml @@ -360,7 +360,7 @@ Рендеринг Рендерер по-умолчанию (для классических войсбанков) Глубина рендеринга DiffSinger - Ускорение рендеринга DiffSinger + DiffSinger Render Steps Графический процессор Фазовая компенсация diff --git a/OpenUtau/Strings/Strings.th-TH.axaml b/OpenUtau/Strings/Strings.th-TH.axaml index c49237cab..61d9a5b19 100644 --- a/OpenUtau/Strings/Strings.th-TH.axaml +++ b/OpenUtau/Strings/Strings.th-TH.axaml @@ -360,7 +360,7 @@ กำลังประมวลผล - + การชดเชย Phase diff --git a/OpenUtau/Strings/Strings.vi-VN.axaml b/OpenUtau/Strings/Strings.vi-VN.axaml index af00c53f1..990324935 100644 --- a/OpenUtau/Strings/Strings.vi-VN.axaml +++ b/OpenUtau/Strings/Strings.vi-VN.axaml @@ -360,7 +360,7 @@ Nhấn giữ Ctrl để chọn nhiều nốt Render - + diff --git a/OpenUtau/Strings/Strings.zh-CN.axaml b/OpenUtau/Strings/Strings.zh-CN.axaml index b271873e8..fcbb33fc8 100644 --- a/OpenUtau/Strings/Strings.zh-CN.axaml +++ b/OpenUtau/Strings/Strings.zh-CN.axaml @@ -360,7 +360,7 @@ 渲染 传统音源的默认渲染器 DiffSinger 渲染深度 - DiffSinger 渲染加速 + DiffSinger 渲染步数 机器学习运行器 相位修正 diff --git a/OpenUtau/Strings/Strings.zh-TW.axaml b/OpenUtau/Strings/Strings.zh-TW.axaml index 5b7497e7f..d27cc8893 100644 --- a/OpenUtau/Strings/Strings.zh-TW.axaml +++ b/OpenUtau/Strings/Strings.zh-TW.axaml @@ -360,7 +360,7 @@ 算繪 - + 相位補償 diff --git a/OpenUtau/ViewModels/PreferencesViewModel.cs b/OpenUtau/ViewModels/PreferencesViewModel.cs index e82af39f1..ff3ecd52b 100644 --- a/OpenUtau/ViewModels/PreferencesViewModel.cs +++ b/OpenUtau/ViewModels/PreferencesViewModel.cs @@ -39,9 +39,9 @@ public AudioOutputDevice? AudioOutputDevice { [Reactive] public string OnnxRunner { get; set; } public List OnnxGpuOptions { get; set; } [Reactive] public GpuInfo OnnxGpu { get; set; } - public List DiffsingerSpeedupOptions { get; } = new List { 1, 5, 10, 20, 50, 100 }; - [Reactive] public int DiffSingerDepth { get; set; } - [Reactive] public int DiffsingerSpeedup { get; set; } + public List DiffSingerStepsOptions { get; } = new List { 2, 5, 10, 20, 50, 100, 200, 500, 1000 }; + [Reactive] public double DiffSingerDepth { get; set; } + [Reactive] public int DiffSingerSteps { get; set; } [Reactive] public bool SkipRenderingMutedTracks { get; set; } [Reactive] public bool HighThreads { get; set; } [Reactive] public int Theme { get; set; } @@ -139,8 +139,8 @@ public PreferencesViewModel() { OnnxRunnerOptions[0] : Preferences.Default.OnnxRunner; OnnxGpuOptions = Onnx.getGpuInfo(); OnnxGpu = OnnxGpuOptions.FirstOrDefault(x => x.deviceId == Preferences.Default.OnnxGpu, OnnxGpuOptions[0]); - DiffSingerDepth = Preferences.Default.DiffSingerDepth; - DiffsingerSpeedup = Preferences.Default.DiffsingerSpeedup; + DiffSingerDepth = Preferences.Default.DiffSingerDepth * 100; + DiffSingerSteps = Preferences.Default.DiffSingerSteps; SkipRenderingMutedTracks = Preferences.Default.SkipRenderingMutedTracks; Theme = Preferences.Default.Theme; PenPlusDefault = Preferences.Default.PenPlusDefault; @@ -325,14 +325,14 @@ public PreferencesViewModel() { Preferences.Default.ClearCacheOnQuit = index; Preferences.Save(); }); - this.WhenAnyValue(vm => vm.DiffsingerSpeedup) + this.WhenAnyValue(vm => vm.DiffSingerSteps) .Subscribe(index => { - Preferences.Default.DiffsingerSpeedup = index; + Preferences.Default.DiffSingerSteps = index; Preferences.Save(); }); this.WhenAnyValue(vm => vm.DiffSingerDepth) .Subscribe(index => { - Preferences.Default.DiffSingerDepth = index; + Preferences.Default.DiffSingerDepth = index / 100; Preferences.Save(); }); this.WhenAnyValue(vm => vm.SkipRenderingMutedTracks) diff --git a/OpenUtau/Views/PreferencesDialog.axaml b/OpenUtau/Views/PreferencesDialog.axaml index 131169b97..ca6741870 100644 --- a/OpenUtau/Views/PreferencesDialog.axaml +++ b/OpenUtau/Views/PreferencesDialog.axaml @@ -130,19 +130,19 @@ - - + + - + - + From 3f3f6a512a625ac66621a938d08df98db81c1fc3 Mon Sep 17 00:00:00 2001 From: cadlaxa Date: Sun, 7 Apr 2024 08:48:13 +0800 Subject: [PATCH 15/27] `[c c]` fallback fixes and reduced unnecessary codes --- .../ArpasingPlusPhonemizer.cs | 182 ++---------------- 1 file changed, 19 insertions(+), 163 deletions(-) diff --git a/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs b/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs index acd12013a..bb7e5c282 100644 --- a/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs @@ -32,7 +32,7 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer { private readonly string[] tapConsonant = "dx,nx,lx".Split(","); private readonly string[] semilongConsonants = "ng,n,m,v,z,q,hh".Split(","); private readonly string[] semiVowels = "y,w".Split(","); - private readonly string[] connectingGlides = "l,r".Split(","); + private readonly string[] connectingGlides = "l,r,ll".Split(","); private readonly string[] longConsonants = "f,s,sh,th,zh,dr,tr,ts,c".Split(","); private readonly string[] normalConsonants = "b,d,dh,g,k,p,t,l,r".Split(','); private readonly string[] connectingNormCons = "b,d,g,k,p,t".Split(','); @@ -105,8 +105,8 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer { {"oy","ao"}, }; + private readonly string[] ccvException = { "ch", "dh", "dx", "fh", "gh", "hh", "jh", "kh", "ph", "ng", "sh", "th", "vh", "wh", "zh" }; - private readonly string[] vc_cAcception = { "r", "l" }; private readonly string[] RomajiException = { "a", "e", "i", "o", "u" }; private string[] tails = "-,R,RB".Split(','); @@ -288,7 +288,7 @@ protected override List ProcessSyllable(Syllable syllable) { } } } - // EXTEND AS [V] + // EXTEND AS [V] } else if (HasOto($"{v}", syllable.vowelTone) || missingVphonemes.ContainsKey(prevV)) { basePhoneme = v; } else if (!HasOto(v, syllable.vowelTone) && vvDiphthongExceptions.ContainsKey(prevV)) { @@ -354,7 +354,8 @@ protected override List ProcessSyllable(Syllable syllable) { if (TryAddPhoneme(phonemes, syllable.tone, $"- {string.Join("", cc.Take(i))}", $"-{string.Join("", cc.Take(i))}", ValidateAlias($"- {string.Join("", cc.Take(i))}"), ValidateAlias($"-{string.Join("", cc.Take(i))}"))) { firstC = i - 1; } - } break; + } + break; } // [- C] if (phonemes.Count == 0) { @@ -384,7 +385,7 @@ protected override List ProcessSyllable(Syllable syllable) { } else if (syllable.CurrentWordCc.Length == 1 && syllable.PreviousWordCc.Length == 1) { basePhoneme = crv; } - } + } // try [V C], [V CC], [VC C], [V -][- C] for (var i = lastC + 1; i >= 0; i--) { var vr = $"{prevV} -"; @@ -406,7 +407,11 @@ protected override List ProcessSyllable(Syllable syllable) { phonemes.Add(vcc); firstC = 1; break; - } else if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone)) { + } else if ((HasOto(vc_c, syllable.tone) || HasOto(ValidateAlias(vc_c), syllable.tone))) { + phonemes.Add(vcc); + firstC = 1; + break; + } else if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone) ) { phonemes.Add(vc); break; } else { @@ -816,7 +821,6 @@ protected override string ValidateAlias(string alias) { var CVMappings = new Dictionary { { "ao", new[] { "ow" } }, - { "ax", new[] { "ah" } }, { "oy", new[] { "ow" } }, { "aw", new[] { "ah" } }, { "ay", new[] { "ah" } }, @@ -935,18 +939,12 @@ protected override string ValidateAlias(string alias) { if (alias == "aa b") { return alias.Replace("aa b", "aa d"); } - if (alias == "aa dr") { - return alias.Replace("aa dr", "aa d"); - } if (alias == "aa dx") { return alias.Replace("aa dx", "aa d"); } if (alias == "aa q") { return alias.Replace("aa q", "aa t"); } - if (alias == "aa tr") { - return alias.Replace("aa tr", "aa t"); - } if (alias == "aa y") { return alias.Replace("aa y", "ah iy"); } @@ -958,18 +956,12 @@ protected override string ValidateAlias(string alias) { if (alias == "ae b") { return alias.Replace("ae b", "ah d"); } - if (alias == "ae dr") { - return alias.Replace("ae dr", "ah d"); - } if (alias == "ae dx") { return alias.Replace("ae dx", "ah d"); } if (alias == "ae q") { return alias.Replace("ae q", "ah t"); } - if (alias == "ae tr") { - return alias.Replace("ae tr", "ah t"); - } if (alias == "ae y") { return alias.Replace("ae y", "ah iy"); } @@ -981,18 +973,12 @@ protected override string ValidateAlias(string alias) { if (alias == "ah b") { return alias.Replace("ah b", "ah d"); } - if (alias == "ah dr") { - return alias.Replace("ah dr", "ah d"); - } if (alias == "ah dx") { return alias.Replace("ah dx", "ah d"); } if (alias == "ah q") { return alias.Replace("ah q", "ah t"); } - if (alias == "ah tr") { - return alias.Replace("ah tr", "ah t"); - } if (alias == "ah y") { return alias.Replace("ah y", "ah iy"); } @@ -1005,18 +991,12 @@ protected override string ValidateAlias(string alias) { if (alias == "ao b") { return alias.Replace("ao b", "ah d"); } - if (alias == "ao dr") { - return alias.Replace("ao dr", "ah d"); - } if (alias == "ao dx") { return alias.Replace("ao dx", "ah d"); } if (alias == "ao q") { return alias.Replace("ao q", "ao t"); } - if (alias == "ao tr") { - return alias.Replace("ao tr", "ao t"); - } if (alias == "ao y") { return alias.Replace("ao y", "ow y"); } @@ -1029,18 +1009,12 @@ protected override string ValidateAlias(string alias) { if (alias == "ax b") { return alias.Replace("ax b", "ah d"); } - if (alias == "ax dr") { - return alias.Replace("ax dr", "ah d"); - } if (alias == "ax dx") { return alias.Replace("ax dx", "ah d"); } if (alias == "ax q") { return alias.Replace("ax q", "ah t"); } - if (alias == "ax tr") { - return alias.Replace("ax tr", "ah t"); - } if (alias == "ax y") { return alias.Replace("ax y", "ah iy"); } @@ -1071,9 +1045,6 @@ protected override string ValidateAlias(string alias) { if (alias == "eh y") { return alias.Replace("eh y", "ey"); } - if (alias == "eh tr") { - return alias.Replace("eh tr", "eh t"); - } if (alias == "eh zh") { return alias.Replace("eh zh", "eh s"); } @@ -1082,9 +1053,6 @@ protected override string ValidateAlias(string alias) { if (alias == "er ch") { return alias.Replace("er ch", "er t"); } - if (alias == "er dr") { - return alias.Replace("er dr", "er d"); - } if (alias == "er dx") { return alias.Replace("er dx", "er d"); } @@ -1103,9 +1071,6 @@ protected override string ValidateAlias(string alias) { if (alias == "er sh") { return alias.Replace("er sh", "er s"); } - if (alias == "eh tr") { - return alias.Replace("eh tr", "eh t"); - } if (alias == "er zh") { return alias.Replace("er zh", "er z"); } @@ -1114,9 +1079,6 @@ protected override string ValidateAlias(string alias) { if (alias == "ih b") { return alias.Replace("ih b", "ih d"); } - if (alias == "ih dr") { - return alias.Replace("ih dr", "ih d"); - } if (alias == "ih dx") { return alias.Replace("ih dx", "ih d"); } @@ -1126,9 +1088,6 @@ protected override string ValidateAlias(string alias) { if (alias == "ih q") { return alias.Replace("ih q", "ih t"); } - if (alias == "ih tr") { - return alias.Replace("ih tr", "ih t"); - } if (alias == "ih w") { return alias.Replace("ih w", "iy w"); } @@ -1140,9 +1099,6 @@ protected override string ValidateAlias(string alias) { } //VC (iy specific) - if (alias == "iy dr") { - return alias.Replace("iy dr", "iy d"); - } if (alias == "iy dx") { return alias.Replace("iy dx", "iy d"); } @@ -1170,9 +1126,6 @@ protected override string ValidateAlias(string alias) { if (alias == "uh ch") { return alias.Replace("uh ch", "uh t"); } - if (alias == "uh dr") { - return alias.Replace("uh dr", "uh d"); - } if (alias == "uh dx") { return alias.Replace("uh dx", "uh d"); } @@ -1182,9 +1135,6 @@ protected override string ValidateAlias(string alias) { if (alias == "uh q") { return alias.Replace("uh q", "uh t"); } - if (alias == "uh tr") { - return alias.Replace("uh tr", "uh t"); - } if (alias == "uh zh") { return alias.Replace("uh zh", "uw z"); } @@ -1193,9 +1143,6 @@ protected override string ValidateAlias(string alias) { if (alias == "uw ch") { return alias.Replace("uw ch", "uw t"); } - if (alias == "uw dr") { - return alias.Replace("uw dr", "uw d"); - } if (alias == "uw dx") { return alias.Replace("uw dx", "uw d"); } @@ -1208,9 +1155,6 @@ protected override string ValidateAlias(string alias) { if (alias == "uw q") { return alias.Replace("uw q", "uw t"); } - if (alias == "uw tr") { - return alias.Replace("uw tr", "uw t"); - } if (alias == "uw zh") { return alias.Replace("uw zh", "uw sh"); } @@ -1286,9 +1230,6 @@ protected override string ValidateAlias(string alias) { if (alias == "f sh") { return alias.Replace("sh", "s"); } - if (alias == "f w") { - return alias.Replace("f w", "f uw"); - } if (alias == "f z") { return alias.Replace("z", "s"); } @@ -1314,9 +1255,6 @@ protected override string ValidateAlias(string alias) { } //CC (hh specific) - if (alias == "hh w") { - return alias.Replace("hh w", "hh uw"); - } if (alias == "hh y") { return alias.Replace("hh", "f"); } @@ -1373,9 +1311,6 @@ protected override string ValidateAlias(string alias) { if (alias == "m ch") { return alias.Replace("m", "n"); } - if (alias == "m hh") { - return alias.Replace("m hh", "hh"); - } if (alias == "m jh") { return alias.Replace("jh", "d"); } @@ -1457,9 +1392,6 @@ protected override string ValidateAlias(string alias) { if (alias == "r ch") { return alias.Replace("ch", "t"); } - if (alias == "r dr") { - return alias.Replace("dr", "jh"); - } if (alias == "r dx") { return alias.Replace("dx", "d"); } @@ -1474,9 +1406,6 @@ protected override string ValidateAlias(string alias) { } //CC (s specific) - if (alias == "s dr") { - return alias.Replace("dr", "jh"); - } if (alias == "s ch") { return alias.Replace("ch", "t"); } @@ -1500,12 +1429,6 @@ protected override string ValidateAlias(string alias) { } //CC (sh specific) - if (alias == "sh f") { - return alias.Replace("sh", "s"); - } - if (alias == "sh hh") { - return alias.Replace("sh", "s"); - } if (alias == "sh l") { return alias.Replace("sh", "s"); } @@ -1515,9 +1438,6 @@ protected override string ValidateAlias(string alias) { if (alias == "sh n") { return alias.Replace("sh", "s"); } - if (alias == "sh ng") { - return alias.Replace("sh ng", "s n"); - } if (alias == "sh r") { return alias.Replace("sh", "s"); } @@ -1527,17 +1447,8 @@ protected override string ValidateAlias(string alias) { if (alias == "sh sh") { return alias.Replace("sh sh", "s s"); } - if (alias == "sh w") { - return alias.Replace("sh w", "sh uw"); - } - if (alias == "sh y") { - return alias.Replace("sh y", "sh iy"); - } //CC (t specific) - if (alias == "t y") { - return alias.Replace("y", "iy"); - } if (alias == "t z") { return alias.Replace("t", "g"); } @@ -1557,36 +1468,12 @@ protected override string ValidateAlias(string alias) { if (alias == "v dh") { return alias.Replace("dh", "d"); } - if (alias == "v f") { - return alias.Replace("v", "s"); - } - if (alias == "v hh") { - return alias.Replace("v", "s"); - } - if (alias == "v l") { - return alias.Replace("v", "s"); - } - if (alias == "v m") { - return alias.Replace("v", "s"); - } - if (alias == "v n") { - return alias.Replace("v", "s"); - } - if (alias == "v ng") { - return alias.Replace("v ng", "s n"); - } - if (alias == "v r") { - return alias.Replace("v", "s"); - } if (alias == "v th") { return alias.Replace("v th", "th"); } if (alias == "v s") { return alias.Replace("v", "s"); } - if (alias == "v sh") { - return alias.Replace("v sh", "s s"); - } if (alias == "v z") { return alias.Replace("v z", "s s"); } @@ -1626,15 +1513,9 @@ protected override string ValidateAlias(string alias) { if (alias == "z ch") { return alias.Replace("ch", "t"); } - if (alias == "z dr") { - return alias.Replace("dr", "jh"); - } if (alias == "z dx") { return alias.Replace("dx", "d"); } - if (alias == "z tr") { - return alias.Replace("tr", "t"); - } if (alias == "z ng") { return alias.Replace("ng", "n"); } @@ -1649,15 +1530,9 @@ protected override string ValidateAlias(string alias) { if (alias == "zh ch") { return alias.Replace("ch", "t"); } - if (alias == "zh dr") { - return alias.Replace("dr", "jh"); - } if (alias == "zh dx") { return alias.Replace("dx", "d"); } - if (alias == "zh tr") { - return alias.Replace("tr", "t"); - } if (alias == "zh ng") { return alias.Replace("ng", "n"); } @@ -1792,7 +1667,6 @@ protected override string ValidateAlias(string alias) { } } return base.ValidateAlias(alias); - } protected override double GetTransitionBasicLengthMs(string alias = "") { @@ -1802,7 +1676,6 @@ protected override double GetTransitionBasicLengthMs(string alias = "") { bool isEndingVowel = false; bool hasCons = false; bool haslr = false; - bool hasSuffix = false; var excludedVowels = new List { "a", "e", "i", "o", "u" }; var GlideVCCons = new List { $"{excludedVowels} {connectingGlides}" }; var NormVCCons = new List { $"{excludedVowels} {connectingNormCons}" }; @@ -1810,13 +1683,12 @@ protected override double GetTransitionBasicLengthMs(string alias = "") { var excludedEndings = new List { $"{arpabetFirstVDiphthong}y -", $"{arpabetFirstVDiphthong}w -", $"{arpabetFirstVDiphthong}r -", }; var numbers = new List { "1", "2", "3", "4", "5", "6", "7", "8", "9" }; - foreach (var c in longConsonants) { if (alias.Contains(c) && !alias.StartsWith(c) && !alias.Contains("ng -")) { return base.GetTransitionBasicLengthMs() * 2.5; } } - + foreach (var c in normalConsonants) { foreach (var v in normalConsonants.Except(GlideVCCons)) { foreach (var b in normalConsonants.Except(NormVCCons)) { @@ -1839,7 +1711,7 @@ protected override double GetTransitionBasicLengthMs(string alias = "") { if (alias.Contains(c) && !alias.Contains("- ") && alias.Contains($"{v} {c}") && !alias.Contains("dx")) { return base.GetTransitionBasicLengthMs() * 2.0; - } + } } } @@ -1852,7 +1724,7 @@ protected override double GetTransitionBasicLengthMs(string alias = "") { } foreach (var c in affricates) { - if (alias.Contains(c) && !alias.StartsWith(c) && !alias.Contains($"- ch") && !alias.Contains($"- jh")) { + if (alias.Contains(c) && !alias.StartsWith(c)) { return base.GetTransitionBasicLengthMs() * 1.5; } } @@ -1861,7 +1733,6 @@ protected override double GetTransitionBasicLengthMs(string alias = "") { foreach (var v in vowels.Except(excludedVowels)) { if (alias.Contains($"{v} {c}") && !alias.Contains($"{c} -") && !alias.Contains($"{v} -")) { return base.GetTransitionBasicLengthMs() * 2.5; - } } } @@ -1882,6 +1753,7 @@ protected override double GetTransitionBasicLengthMs(string alias = "") { } } } + foreach (var c in semiVowels) { foreach (var v in semilongConsonants.Except(excludedEndings)) { if (alias.Contains(c) && !alias.StartsWith(c) && !alias.Contains($"{c} -")) { @@ -1898,40 +1770,24 @@ protected override double GetTransitionBasicLengthMs(string alias = "") { // Check if the alias ends with a consonant or vowel foreach (var c in consonants) { - if (alias.Contains(c) && alias.Contains('-') && alias.StartsWith(c)) { + if (alias.Contains(c) && alias.Contains('-') && alias.Contains($"{c} -")) { isEndingConsonant = true; break; } } foreach (var v in vowels) { - if (alias.Contains(v) && alias.Contains('-') && alias.StartsWith(v)) { + if (alias.Contains(v) && alias.Contains('-') && alias.Contains($"{v} -")) { isEndingVowel = true; break; } } - - // Check for tone suffix - foreach (var tone in vowels) { - if (alias.EndsWith(tone)) { - hasSuffix = true; - - break; - } - } - foreach (var tone in consonants) { - if (alias.EndsWith(tone)) { - hasSuffix = true; - - break; - } - } + // If the alias ends with a consonant or vowel, return 0.5 ms - if (isEndingConsonant || isEndingVowel || hasSuffix) { + if (isEndingConsonant || isEndingVowel) { return base.GetTransitionBasicLengthMs() * 0.5; } - return base.GetTransitionBasicLengthMs() * transitionMultiplier; } } From 634b0f1dc01a941d071cd8d5b593704df146b60b Mon Sep 17 00:00:00 2001 From: cadlaxa Date: Sun, 7 Apr 2024 18:19:26 +0800 Subject: [PATCH 16/27] Fixes to `[vc c]` and `[v c]` fallbacks --- .../ArpasingPlusPhonemizer.cs | 49 +++++++++++-------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs b/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs index bb7e5c282..f4e74d504 100644 --- a/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs @@ -71,6 +71,23 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer { .ToDictionary(parts => parts[0], parts => parts[1]); private bool isTimitPhonemes = false; + private bool vc_FallBack = false; + + private readonly Dictionary vcFallBacks = + new Dictionary() { + {"aw","uw"}, + {"ow","uw"}, + {"uh","uw"}, + {"ay","iy"}, + {"ey","iy"}, + {"oy","iy"}, + {"aa","ah"}, + {"ae","ah"}, + {"ao","ah"}, + {"eh","ah"}, + {"er","ah"}, + }; + private readonly Dictionary vvExceptions = new Dictionary() { {"aw","w"}, @@ -248,6 +265,11 @@ protected override List ProcessSyllable(Syllable syllable) { } } + // For VC Fallback phonemes + if (!HasOto($"{vcFallBacks.Keys} {cc}", syllable.tone)) { + vc_FallBack = true; + } + // STARTING V if (syllable.IsStartingV) { // TRIES - V THEN V @@ -327,7 +349,6 @@ protected override List ProcessSyllable(Syllable syllable) { var rccv3 = $"-{string.Join("", cc)}{v}"; var crv = $"{cc.Last()} {v}"; var ccv = $"{string.Join("", cc)} {v}"; - var cv = $"{cc[0]}{v}"; if (HasOto(rccv, syllable.vowelTone) || HasOto(ValidateAlias(rccv), syllable.vowelTone) && !ccvException.Contains(cc[0])) { basePhoneme = rccv; lastC = 0; @@ -407,8 +428,8 @@ protected override List ProcessSyllable(Syllable syllable) { phonemes.Add(vcc); firstC = 1; break; - } else if ((HasOto(vc_c, syllable.tone) || HasOto(ValidateAlias(vc_c), syllable.tone))) { - phonemes.Add(vcc); + } else if (HasOto(vc_c, syllable.tone) || HasOto(ValidateAlias(vc_c), syllable.tone)) { + phonemes.Add(vc_c); firstC = 1; break; } else if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone) ) { @@ -1548,28 +1569,14 @@ protected override string ValidateAlias(string alias) { } //VC's - foreach (var v1 in new[] { "aw", "ow", "uh" }) { - foreach (var c1 in consonants) { - if (vcSpecific || !alias.Contains($"{v1} {c1}")) { - alias = alias.Replace(v1 + " " + c1, "uw" + " " + c1); - } - } - } - foreach (var v1 in new[] { "ay", "ey", "oy" }) { + foreach (var v1 in vcFallBacks) { foreach (var c1 in consonants) { - if (vcSpecific || !alias.Contains($"{v1} {c1}")) { - alias = alias.Replace(v1 + " " + c1, "iy" + " " + c1); + if (vcSpecific && vc_FallBack) { + alias = alias.Replace(v1.Key + " " + c1, v1.Value + " " + c1); } } } - foreach (var v1 in new[] { "aa", "ae", "ao", "eh", "er" }) { - foreach (var c1 in consonants) { - if (vcSpecific || !alias.Contains($"{v1} {c1}")) { - alias = alias.Replace(v1 + " " + c1, "ah" + " " + c1); - } - } - } - + // glottal foreach (var v1 in vowels) { if (!alias.Contains("cl " + v1) || !alias.Contains("q " + v1)) { From 57079251b7c0136aa4ca88872b46bf7b3cdf2615 Mon Sep 17 00:00:00 2001 From: cadlaxa Date: Mon, 8 Apr 2024 09:34:23 +0800 Subject: [PATCH 17/27] Temporarily removed `[vc c]` because of the conflicts --- OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs b/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs index f4e74d504..2992a335a 100644 --- a/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs @@ -428,10 +428,12 @@ protected override List ProcessSyllable(Syllable syllable) { phonemes.Add(vcc); firstC = 1; break; - } else if (HasOto(vc_c, syllable.tone) || HasOto(ValidateAlias(vc_c), syllable.tone)) { + /// temporarily removed vc_c cuz of the arpabet [v] sustain confict on jp vc 😭 + /*} else if (HasOto(vc_c, syllable.tone) || HasOto(ValidateAlias(vc_c), syllable.tone)) { phonemes.Add(vc_c); firstC = 1; break; + */ } else if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone) ) { phonemes.Add(vc); break; @@ -661,10 +663,7 @@ protected override List ProcessEnding(Ending ending) { if (!HasOto(cc1, ending.tone)) { cc1 = ValidateAlias(cc1); } - if (TryAddPhoneme(phonemes, ending.tone, $"{cc[i]} {cc[i + 1]}{cc[i + 2]}-", ValidateAlias($"{cc[i]} {cc[i + 1]}{cc[i + 2]}-"))) { - // like [C1 C2-][C3 ...] - i++; - } else if (HasOto(cc1, ending.tone) && (HasOto(cc2, ending.tone) || HasOto($"{cc[i + 1]} {cc[i + 2]}-", ending.tone) || HasOto(ValidateAlias($"{cc[i + 1]} {cc[i + 2]}-"), ending.tone))) { + if (HasOto(cc1, ending.tone) && (HasOto(cc2, ending.tone) || HasOto($"{cc[i + 1]} {cc[i + 2]}-", ending.tone) || HasOto(ValidateAlias($"{cc[i + 1]} {cc[i + 2]}-"), ending.tone))) { // like [C1 C2][C2 ...] phonemes.Add(cc1); } else if ((HasOto(cc[i], ending.tone) || HasOto(ValidateAlias(cc[i]), ending.tone) && (HasOto(cc2, ending.tone) || HasOto($"{cc[i + 1]} {cc[i + 2]}-", ending.tone) || HasOto(ValidateAlias($"{cc[i + 1]} {cc[i + 2]}-"), ending.tone)))) { From bc4c83d58e5f752806abda060fecf02923f40c54 Mon Sep 17 00:00:00 2001 From: Lotte V Date: Tue, 9 Apr 2024 20:06:06 +0200 Subject: [PATCH 18/27] Re-implement Teto fallback --- OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs b/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs index 87b670b3c..b04c13b66 100644 --- a/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs @@ -214,7 +214,7 @@ protected override List ProcessSyllable(Syllable syllable) { isVocaSampa = true; } - if (!HasOto($"- V", syllable.vowelTone) && !HasOto($"V", syllable.vowelTone)) { + if (!HasOto($"- V", syllable.vowelTone) && !HasOto($"V", syllable.vowelTone) || (!HasOto($"- bV", syllable.vowelTone) && !HasOto($"bV", syllable.vowelTone))) { isSimpleDelta = true; } From 5eef05c60a03aa1eb0b1c279b4f9cdb2d477baac Mon Sep 17 00:00:00 2001 From: Lotte V Date: Tue, 9 Apr 2024 23:10:43 +0200 Subject: [PATCH 19/27] More vowel-related bug fixes --- OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs b/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs index b04c13b66..33220e3c0 100644 --- a/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs @@ -230,7 +230,7 @@ protected override List ProcessSyllable(Syllable syllable) { isTrueXSampa = true; } - if ((!HasOto($"- 3", syllable.tone) && !HasOto($"3", syllable.tone)) || (!HasOto($"- @`", syllable.tone) && !HasOto($"@`", syllable.tone))) { + if (!HasOto($"- 3", syllable.tone) && !HasOto($"3", syllable.tone) && !HasOto($"- @`", syllable.tone) && !HasOto($"@`", syllable.tone)) { isSalemList = true; } @@ -245,9 +245,10 @@ protected override List ProcessSyllable(Syllable syllable) { basePhoneme = v; } } else if (syllable.IsVV) { - if (!CanMakeAliasExtension(syllable) || !AreTonesFromTheSameSubbank(syllable.tone, syllable.vowelTone)) { - basePhoneme = $"{prevV} {v}"; - if (!HasOto(basePhoneme, syllable.vowelTone) && vvExceptions.ContainsKey(prevV) && prevV != v || Delta5vvExceptions.ContainsKey(prevV) && prevV != v) { + if (!CanMakeAliasExtension(syllable)) { + var vv = $"{prevV} {v}"; + basePhoneme = vv; + if (!HasOto(vv, syllable.vowelTone) && !HasOto(ValidateAlias(vv), syllable.vowelTone) && (vvExceptions.ContainsKey(prevV) && prevV != v || Delta5vvExceptions.ContainsKey(prevV) && prevV != v)) { // VV splits to [V C][CV] or [V][V] var delta5vc = $"{Delta5vvExceptions[prevV]}"; bool CV = false; @@ -263,13 +264,11 @@ protected override List ProcessSyllable(Syllable syllable) { basePhoneme = cv; } } else { - { - // VV to V - if (HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone)) { - basePhoneme = $"{prevV} {v}"; - } else if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { - basePhoneme = v; - } + // VV to V + if (HasOto(vv, syllable.vowelTone) || HasOto(ValidateAlias(vv), syllable.vowelTone)) { + basePhoneme = vv; + } else if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { + basePhoneme = v; } } } else { @@ -357,8 +356,9 @@ protected override List ProcessSyllable(Syllable syllable) { basePhoneme = vccv; lastC = 0; } else { - basePhoneme = cc.Last() + v; - if (!HasOto(cc.Last() + v, syllable.vowelTone) && (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone))) { + var cv = cc.Last() + v; + basePhoneme = cv; + if ((!HasOto(cv, syllable.vowelTone) && !HasOto(ValidateAlias(cv), syllable.vowelTone)) && (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone))) { basePhoneme = crv; } // try CCV From 8ece5dfc36dfcfc2bd2754b81ca64588e4a5e524 Mon Sep 17 00:00:00 2001 From: Lotte V Date: Tue, 9 Apr 2024 23:20:28 +0200 Subject: [PATCH 20/27] ValidateAlias stuff --- OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs b/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs index 33220e3c0..2d1aa7e3d 100644 --- a/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs @@ -252,7 +252,7 @@ protected override List ProcessSyllable(Syllable syllable) { // VV splits to [V C][CV] or [V][V] var delta5vc = $"{Delta5vvExceptions[prevV]}"; bool CV = false; - if (!HasOto(delta5vc, syllable.vowelTone)) { + if ((!HasOto(delta5vc, syllable.vowelTone) && !HasOto(ValidateAlias(delta5vc), syllable.vowelTone))) { delta5vc = $"{prevV} {vvExceptions[prevV]}"; CV = true; } @@ -260,7 +260,7 @@ protected override List ProcessSyllable(Syllable syllable) { // if delta5 vc is not available, turn v to cv var cv = $"{vvExceptions[prevV]}{v}"; basePhoneme = v; - if (CV && HasOto(cv, syllable.vowelTone)) { + if (CV && (HasOto(cv, syllable.vowelTone) || HasOto(ValidateAlias(cv), syllable.vowelTone))) { basePhoneme = cv; } } else { From fc50b88ca6e2872325c0bcfb1f1c30cbb8a49c69 Mon Sep 17 00:00:00 2001 From: cadlaxa Date: Fri, 12 Apr 2024 16:03:14 +0800 Subject: [PATCH 21/27] Fixes to alias Validations and add [v] fallback to [ccv] and [cv] --- .../ArpasingPlusPhonemizer.cs | 95 ++++++++++++++----- 1 file changed, 70 insertions(+), 25 deletions(-) diff --git a/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs b/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs index 2992a335a..558008d6f 100644 --- a/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs @@ -70,8 +70,8 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer { .Where(parts => parts[0] != parts[1]) .ToDictionary(parts => parts[0], parts => parts[1]); private bool isTimitPhonemes = false; - private bool vc_FallBack = false; + private bool cPV_FallBack = false; private readonly Dictionary vcFallBacks = new Dictionary() { @@ -84,8 +84,8 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer { {"aa","ah"}, {"ae","ah"}, {"ao","ah"}, - {"eh","ah"}, - {"er","ah"}, + //{"eh","ah"}, + //{"er","ah"}, }; private readonly Dictionary vvExceptions = @@ -266,8 +266,10 @@ protected override List ProcessSyllable(Syllable syllable) { } // For VC Fallback phonemes - if (!HasOto($"{vcFallBacks.Keys} {cc}", syllable.tone)) { - vc_FallBack = true; + foreach (var entry in vcFallBacks) { + if (!HasOto($"{entry.Key} {cc}", syllable.tone) || (!HasOto($"ao {cc}", syllable.tone))) { + vc_FallBack = true; + } } // STARTING V @@ -284,17 +286,17 @@ protected override List ProcessSyllable(Syllable syllable) { else if (syllable.IsVV) { if (!CanMakeAliasExtension(syllable)) { basePhoneme = $"{prevV} {v}"; - if (!HasOto(basePhoneme, syllable.vowelTone) && vvExceptions.ContainsKey(prevV) && prevV != v) { + if (!HasOto(basePhoneme, syllable.vowelTone) && !HasOto(ValidateAlias(basePhoneme), syllable.vowelTone) && vvExceptions.ContainsKey(prevV) && prevV != v) { // VV IS NOT PRESENT, CHECKS VVEXCEPTIONS LOGIC var vc = $"{prevV}{vvExceptions[prevV]}"; - if (!HasOto(vc, syllable.vowelTone)) { + if (!HasOto(vc, syllable.vowelTone) && !HasOto(ValidateAlias(vc), syllable.vowelTone)) { vc = $"{prevV} {vvExceptions[prevV]}"; } phonemes.Add(vc); var crv = $"{vvExceptions[prevV]} {v}"; var cv = $"{vvExceptions[prevV]}{v}"; basePhoneme = cv; - if (!HasOto(cv, syllable.vowelTone)) { + if (!HasOto(cv, syllable.vowelTone) && !HasOto(ValidateAlias(cv), syllable.vowelTone)) { basePhoneme = crv; } } else { @@ -311,9 +313,9 @@ protected override List ProcessSyllable(Syllable syllable) { } } // EXTEND AS [V] - } else if (HasOto($"{v}", syllable.vowelTone) || missingVphonemes.ContainsKey(prevV)) { + } else if (HasOto($"{v}", syllable.vowelTone) && HasOto(ValidateAlias($"{v}"), syllable.vowelTone) || missingVphonemes.ContainsKey(prevV)) { basePhoneme = v; - } else if (!HasOto(v, syllable.vowelTone) && vvDiphthongExceptions.ContainsKey(prevV)) { + } else if (!HasOto(v, syllable.vowelTone) && !HasOto(ValidateAlias(v), syllable.vowelTone) && vvDiphthongExceptions.ContainsKey(prevV)) { basePhoneme = $"{vvDiphthongExceptions[prevV]} {vvDiphthongExceptions[prevV]}"; } else { // PREVIOUS ALIAS WILL EXTEND as [V V] @@ -326,16 +328,27 @@ protected override List ProcessSyllable(Syllable syllable) { var rcv1 = $"- {cc[0]}{v}"; var crv = $"{cc[0]} {v}"; var cv = $"{cc[0]}{v}"; - if (HasOto(rcv, syllable.vowelTone) || HasOto(ValidateAlias(rcv), syllable.vowelTone)) { + /// - CV + if ((HasOto(rcv, syllable.vowelTone) || HasOto(ValidateAlias(rcv), syllable.vowelTone))) { basePhoneme = rcv; - } else if (!HasOto(rcv, syllable.vowelTone) && HasOto(rcv1, syllable.vowelTone)) { + } else if ((HasOto(rcv1, syllable.vowelTone) || HasOto(ValidateAlias(rcv1), syllable.vowelTone)) && (!HasOto(rcv, syllable.vowelTone) && !HasOto(ValidateAlias(rcv), syllable.vowelTone))) { basePhoneme = rcv1; - } else if (!HasOto(rcv, syllable.vowelTone) && HasOto(crv, syllable.vowelTone) && HasOto(ValidateAlias(crv), syllable.vowelTone) && !HasOto(rcv1, syllable.vowelTone)) { + /// CV + } else if ((HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone))) { basePhoneme = crv; TryAddPhoneme(phonemes, syllable.tone, $"- {cc[0]}", $"-{cc[0]}", ValidateAlias($"- {cc[0]}"), ValidateAlias($"-{cc[0]}")); - } else if (!HasOto(rcv, syllable.vowelTone) && !HasOto(rcv1, syllable.vowelTone) && HasOto(cv, syllable.vowelTone) && HasOto(ValidateAlias(cv), syllable.vowelTone)) { + } else if ((HasOto(cv, syllable.vowelTone) || HasOto(ValidateAlias(cv), syllable.vowelTone)) && (!HasOto(crv, syllable.vowelTone) && !HasOto(ValidateAlias(crv), syllable.vowelTone))) { basePhoneme = cv; TryAddPhoneme(phonemes, syllable.tone, $"- {cc[0]}", $"-{cc[0]}", ValidateAlias($"- {cc[0]}"), ValidateAlias($"-{cc[0]}")); + /// - C+V + } else if ((HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) && (!HasOto(cv, syllable.vowelTone) && !HasOto(ValidateAlias(cv), syllable.vowelTone)) && (!HasOto(crv, syllable.vowelTone) && !HasOto(ValidateAlias(crv), syllable.vowelTone))) { + basePhoneme = v; + TryAddPhoneme(phonemes, syllable.tone, $"- {cc[0]}", $"-{cc[0]}", ValidateAlias($"- {cc[0]}"), ValidateAlias($"-{cc[0]}")); + TryAddPhoneme(phonemes, syllable.tone, $"{cc[0]} -", $"{cc[0]}-", ValidateAlias($"{cc[0]} -"), ValidateAlias($"{cc[0]}-")); + } else if ((HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone) && (!HasOto(v, syllable.vowelTone) && !HasOto(ValidateAlias(v), syllable.vowelTone)) && (!HasOto(cv, syllable.vowelTone) && !HasOto(ValidateAlias(cv), syllable.vowelTone)) && (!HasOto(crv, syllable.vowelTone) && !HasOto(ValidateAlias(crv), syllable.vowelTone)))) { + basePhoneme = $"{prevV} {v}"; + TryAddPhoneme(phonemes, syllable.tone, $"- {cc[0]}", $"-{cc[0]}", ValidateAlias($"- {cc[0]}"), ValidateAlias($"-{cc[0]}")); + TryAddPhoneme(phonemes, syllable.tone, $"{cc[0]} -", $"{cc[0]}-", ValidateAlias($"{cc[0]} -"), ValidateAlias($"{cc[0]}-")); } else { basePhoneme = crv; TryAddPhoneme(phonemes, syllable.tone, $"- {cc[0]}", $"-{cc[0]}", ValidateAlias($"- {cc[0]}"), ValidateAlias($"-{cc[0]}")); @@ -349,6 +362,7 @@ protected override List ProcessSyllable(Syllable syllable) { var rccv3 = $"-{string.Join("", cc)}{v}"; var crv = $"{cc.Last()} {v}"; var ccv = $"{string.Join("", cc)} {v}"; + /// - CCV if (HasOto(rccv, syllable.vowelTone) || HasOto(ValidateAlias(rccv), syllable.vowelTone) && !ccvException.Contains(cc[0])) { basePhoneme = rccv; lastC = 0; @@ -362,12 +376,18 @@ protected override List ProcessSyllable(Syllable syllable) { basePhoneme = rccv3; lastC = 0; } else { + /// CCV and CV if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone) && !ccvException.Contains(cc[0])) { basePhoneme = ccv; } else if (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone)) { basePhoneme = crv; + /// C+V + } else if ((HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) && (!HasOto(crv, syllable.vowelTone) && !HasOto(ValidateAlias(ccv), syllable.vowelTone)) && (!HasOto(ccv, syllable.vowelTone) && !HasOto(ValidateAlias(crv), syllable.vowelTone))) { + basePhoneme = v; + } else if ((HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone) && (!HasOto(v, syllable.vowelTone) && !HasOto(ValidateAlias(v), syllable.vowelTone)) && (!HasOto(ccv, syllable.vowelTone) && !HasOto(ValidateAlias(ccv), syllable.vowelTone)) && (!HasOto(crv, syllable.vowelTone) && !HasOto(ValidateAlias(crv), syllable.vowelTone)))) { + basePhoneme = $"{prevV} {v}"; } else { - basePhoneme = $"{cc.Last()}{v}"; + basePhoneme = $"{cc.Last()} {v}"; } // TRY RCC [- CC] for (var i = cc.Length; i > 1; i--) { @@ -385,15 +405,25 @@ protected override List ProcessSyllable(Syllable syllable) { } } else { var crv = $"{cc.Last()} {v}"; + var cv = $"{cc.Last()}{v}"; + /// CV if (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone)) { basePhoneme = crv; + } else if ((HasOto(cv, syllable.vowelTone) || HasOto(ValidateAlias(cv), syllable.vowelTone)) && (HasOto(crv, syllable.vowelTone) && HasOto(ValidateAlias(crv), syllable.vowelTone))) { + basePhoneme = cv; + /// C+V + } else if ((HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) && (!HasOto(cv, syllable.vowelTone) && !HasOto(ValidateAlias(cv), syllable.vowelTone)) && (!HasOto(crv, syllable.vowelTone) && !HasOto(ValidateAlias(crv), syllable.vowelTone))) { + basePhoneme = v; + } else if ((HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone) && (!HasOto(v, syllable.vowelTone) && !HasOto(ValidateAlias(v), syllable.vowelTone)) && (!HasOto(cv, syllable.vowelTone) && !HasOto(ValidateAlias(cv), syllable.vowelTone)) && (!HasOto(crv, syllable.vowelTone) && !HasOto(ValidateAlias(crv), syllable.vowelTone)))) { + basePhoneme = $"{prevV} {v}"; } else { - basePhoneme = $"{cc.Last()}{v}"; + basePhoneme = $"{cc.Last()} {v}"; } // try [CC V] for (var i = firstC; i < cc.Length - 1; i++) { var ccv = $"{string.Join("", cc)} {v}"; var ccv1 = string.Join("", cc.Skip(i)) + " " + v; + /// CCV if (syllable.CurrentWordCc.Length >= 2 && !ccvException.Contains(cc[i] + cc[i + 1])) { if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) { basePhoneme = ccv; @@ -403,6 +433,7 @@ protected override List ProcessSyllable(Syllable syllable) { basePhoneme = ccv1; } break; + /// C-Last V } else if (syllable.CurrentWordCc.Length == 1 && syllable.PreviousWordCc.Length == 1) { basePhoneme = crv; } @@ -413,7 +444,7 @@ protected override List ProcessSyllable(Syllable syllable) { var vc_c = $"{prevV}{string.Join(" ", cc.Take(2))}"; var vcc = $"{prevV} {string.Join("", cc.Take(2))}"; var vc = $"{prevV} {cc[0]}"; - // CCV will trigger VCC + // Boolean Triggers bool CCV = false; if (syllable.CurrentWordCc.Length >= 2 && !ccvException.Contains(cc[1])) { if (HasOto($"{string.Join("", cc)} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{string.Join("", cc)} {v}"), syllable.vowelTone)) { @@ -429,12 +460,15 @@ protected override List ProcessSyllable(Syllable syllable) { firstC = 1; break; /// temporarily removed vc_c cuz of the arpabet [v] sustain confict on jp vc 😭 - /*} else if (HasOto(vc_c, syllable.tone) || HasOto(ValidateAlias(vc_c), syllable.tone)) { - phonemes.Add(vc_c); - firstC = 1; + /*} else if (HasOto(vc_c, syllable.tone) || HasOto(ValidateAlias(vc_c), syllable.tone)) { + phonemes.Add(vc_c); + firstC = 1; + break; + */ + } else if (cPV_FallBack && (!HasOto(crv, syllable.vowelTone) && !HasOto(ValidateAlias(crv), syllable.vowelTone))) { + TryAddPhoneme(phonemes, syllable.tone, vc, ValidateAlias(vc)); break; - */ - } else if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone) ) { + } else if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone)) { phonemes.Add(vc); break; } else { @@ -442,6 +476,7 @@ protected override List ProcessSyllable(Syllable syllable) { } } } + for (var i = firstC; i < lastC; i++) { var ccv = $"{string.Join("", cc.Skip(i))} {v}"; var cc1 = $"{string.Join(" ", cc.Skip(i))}"; @@ -489,6 +524,12 @@ protected override List ProcessSyllable(Syllable syllable) { cc1 = $"{cc[i]} {cc[i + 1]}"; } } + // C+V + if ((HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) && (!HasOto(lcv, syllable.vowelTone) && !HasOto(ValidateAlias(lcv), syllable.vowelTone))) { + cPV_FallBack = true; + basePhoneme = v; + cc1 = ValidateAlias(cc1); + } if (i + 1 < lastC) { if (!HasOto(cc1, syllable.tone)) { cc1 = ValidateAlias(cc1); @@ -507,8 +548,7 @@ protected override List ProcessSyllable(Syllable syllable) { phonemes.Add($"{cc[i]} -"); } else if (!HasOto($"- {cc[i + 1]}", syllable.tone) && !HasOto($"{cc[i]} -", syllable.tone)) { // [C1-] [-C2] - cc1 = $"-{cc[i + 1]}"; - phonemes.Add($"{cc[i]}-"); + phonemes.Add($"{cc[0]} -"); } // CC V / CCC V on multiple consonants ex [tr ey s] [spr ih ng] (only if the word starts with a CC or CCC...) if (syllable.CurrentWordCc.Length >= 2) { @@ -534,6 +574,11 @@ protected override List ProcessSyllable(Syllable syllable) { cc1 = $"{cc[i]} {cc[i + 1]}"; } } + // C+V + if ((HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) && (!HasOto(lcv, syllable.vowelTone) && !HasOto(ValidateAlias(lcv), syllable.vowelTone))) { + cPV_FallBack = true; + basePhoneme = v; + } if (HasOto(cc1, syllable.tone) && HasOto(cc1, syllable.tone) && !cc1.Contains($"{string.Join("", cc.Skip(i))}")) { // like [V C1] [C1 C2] [C2 C3] [C3 ..] phonemes.Add(cc1); @@ -1570,7 +1615,7 @@ protected override string ValidateAlias(string alias) { //VC's foreach (var v1 in vcFallBacks) { foreach (var c1 in consonants) { - if (vcSpecific && vc_FallBack) { + if (vc_FallBack && isMissingVPhonemes) { alias = alias.Replace(v1.Key + " " + c1, v1.Value + " " + c1); } } From f2b50a35ceeb228cb42e42ef4e5d97cfbf2c9b66 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Sun, 14 Apr 2024 03:59:02 +0900 Subject: [PATCH 22/27] Fixed forgetting to set icons --- OpenUtau/Views/ExeSetupDialog.axaml | 1 + OpenUtau/Views/SliderDialog.axaml | 1 + OpenUtau/Views/TimeSignatureDialog.axaml | 1 + OpenUtau/Views/TrackSettingsDialog.axaml | 1 + 4 files changed, 4 insertions(+) diff --git a/OpenUtau/Views/ExeSetupDialog.axaml b/OpenUtau/Views/ExeSetupDialog.axaml index 816f11513..39d71ef9d 100644 --- a/OpenUtau/Views/ExeSetupDialog.axaml +++ b/OpenUtau/Views/ExeSetupDialog.axaml @@ -4,6 +4,7 @@ xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="d" Width="400" Height="200" x:Class="OpenUtau.App.Views.ExeSetupDialog" + Icon="/Assets/open-utau.ico" Title="Installing exe"> diff --git a/OpenUtau/Views/SliderDialog.axaml b/OpenUtau/Views/SliderDialog.axaml index b29706333..a2d14d707 100644 --- a/OpenUtau/Views/SliderDialog.axaml +++ b/OpenUtau/Views/SliderDialog.axaml @@ -4,6 +4,7 @@ xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="d" x:Class="OpenUtau.App.Views.SliderDialog" + Icon="/Assets/open-utau.ico" Title="SliderDialog" Height="120" Width="300" WindowStartupLocation="CenterOwner" ExtendClientAreaToDecorationsHint="False"> diff --git a/OpenUtau/Views/TimeSignatureDialog.axaml b/OpenUtau/Views/TimeSignatureDialog.axaml index c1c520ced..d07e3484f 100644 --- a/OpenUtau/Views/TimeSignatureDialog.axaml +++ b/OpenUtau/Views/TimeSignatureDialog.axaml @@ -4,6 +4,7 @@ xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="d" x:Class="OpenUtau.App.Views.TimeSignatureDialog" + Icon="/Assets/open-utau.ico" Title="{DynamicResource dialogs.timesig.caption}" Height="100" Width="320" WindowStartupLocation="CenterOwner" ExtendClientAreaToDecorationsHint="False"> diff --git a/OpenUtau/Views/TrackSettingsDialog.axaml b/OpenUtau/Views/TrackSettingsDialog.axaml index 1e2f895dc..84bb01adc 100644 --- a/OpenUtau/Views/TrackSettingsDialog.axaml +++ b/OpenUtau/Views/TrackSettingsDialog.axaml @@ -4,6 +4,7 @@ xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="d" x:Class="OpenUtau.App.Views.TrackSettingsDialog" + Icon="/Assets/open-utau.ico" Title="{DynamicResource dialogs.tracksettings.caption}" Height="184" Width="320" WindowStartupLocation="CenterOwner" ExtendClientAreaToDecorationsHint="False" CanResize="False"> From 874f84a994431f76b29dbda271c2124a9278eeee Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Mon, 15 Apr 2024 23:12:04 +0900 Subject: [PATCH 23/27] Added icon settings to the style window --- OpenUtau/Styles/Styles.axaml | 1 + 1 file changed, 1 insertion(+) diff --git a/OpenUtau/Styles/Styles.axaml b/OpenUtau/Styles/Styles.axaml index 8efb37461..31246c156 100644 --- a/OpenUtau/Styles/Styles.axaml +++ b/OpenUtau/Styles/Styles.axaml @@ -11,6 +11,7 @@