Skip to content

Commit

Permalink
Merge branch 'stakira:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
oxygen-dioxide authored Feb 9, 2024
2 parents fcdb7e1 + d4b3cc2 commit bf8575f
Show file tree
Hide file tree
Showing 24 changed files with 460 additions and 347 deletions.
52 changes: 34 additions & 18 deletions OpenUtau.Core/Classic/VoicebankErrorChecker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,20 +101,15 @@ public void Check() {
message = $"There are duplicate aliases.{message}"
});
}
//Cross platform check
//Windows path is case insensitive, while MacOS path and Linux path are case sensitive.
//On Windows, check if the wave filename in oto.ini is the same as the filename in the file system.
if(OS.IsWindows()){
foreach(var otoSet in voicebank.OtoSets) {
WindowsCaseCheck(otoSet);
}
WindowsCaseCheck(voicebank.BasePath, new string[]{
"chatacter.txt",
"character.yaml",
"prefix.map",
});
foreach(var otoSet in voicebank.OtoSets) {
CheckCaseMatchForFileReference(otoSet);
CheckDuplicatedNameIgnoringCase(otoSet);
}
//TODO: On MacOS and Linux, check if there are files that have the same name but different case.
CheckCaseMatchForFileReference(voicebank.BasePath, new string[]{
"chatacter.txt",
"character.yaml",
"prefix.map",
});
}

bool TryGetFileDuration(string filePath, Oto oto, out double fileDuration) {
Expand Down Expand Up @@ -260,16 +255,16 @@ bool FindDuplication(out List<Oto> duplicates) {
/// </summary>
/// <param name="otoSet">otoSet to be checked</param>
/// <returns></returns>
bool WindowsCaseCheck(OtoSet otoSet) {
return WindowsCaseCheck(
bool CheckCaseMatchForFileReference(OtoSet otoSet) {
return CheckCaseMatchForFileReference(
Directory.GetParent(otoSet.File).FullName,
otoSet.Otos
.Select(oto => oto.Wav)
.Append(otoSet.File)//oto.ini itself
.ToHashSet());
}

bool WindowsCaseCheck(string folder, IEnumerable<string> correctFileNames){
bool CheckCaseMatchForFileReference(string folder, IEnumerable<string> correctFileNames){
bool valid = true;
Dictionary<string, string> fileNamesLowerToActual = Directory.GetFiles(folder)
.Select(Path.GetFileName)
Expand All @@ -280,15 +275,36 @@ bool WindowsCaseCheck(string folder, IEnumerable<string> correctFileNames){
}
if (fileNamesLowerToActual[fileName.ToLower()] != fileName) {
valid = false;
Infos.Add(new VoicebankError() {
Errors.Add(new VoicebankError() {
message = $"Wrong case in file name: \n"
+ $"expected: {Path.Join(folder,fileName)}\n"
+ $"Actual: {Path.Join(folder,fileNamesLowerToActual[fileName.ToLower()])}\n"
+ $"voicebank may not work on another OS."
+ $"The voicebank may not work on another OS."
});
}
}
return valid;
}

/// <summary>
/// Check if the file names are duplicated when converted to lower case.
/// </summary>
/// <param name="otoSet">otoSet to be checked</param>
/// <returns></returns>
bool CheckDuplicatedNameIgnoringCase(OtoSet otoSet) {
var wavNames = otoSet.Otos.Select(x => x.Wav).Distinct().ToList();
var duplicatedGroups = wavNames.GroupBy(x => x.ToLower())
.Where(group => group.Count() > 1)
.ToList();
foreach (var group in duplicatedGroups) {
Errors.Add(new VoicebankError() {
message = $"Duplicated file names found when ignoreing case in oto set \"{otoSet.Name}\":"
+ string.Join(", ", group.Select(x => $"\"{x}\""))
+ ".\n"
+ "The voicebank may not work on another OS with case-sensitivity."
});
}
return duplicatedGroups.Count == 0;
}
}
}
2 changes: 1 addition & 1 deletion OpenUtau.Core/DiffSinger/DiffSingerPitch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ public RenderPitchResult Process(RenderPhrase phrase){
new DenseTensor<Int64>(word_dur, new int[] { word_dur.Length }, false)
.Reshape(new int[] { 1, word_dur.Length })));
}else{
//if predict_dur is true, use phoneme encode mode
//if predict_dur is false, use phoneme encode mode
linguisticInputs.Add(NamedOnnxValue.CreateFromTensor("ph_dur",
new DenseTensor<Int64>(ph_dur.Select(x=>(Int64)x).ToArray(), new int[] { ph_dur.Length }, false)
.Reshape(new int[] { 1, ph_dur.Length })));
Expand Down
24 changes: 20 additions & 4 deletions OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,14 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup, Cancellati
}

var vocoder = singer.getVocoder();
//Vocoder and singer should have the same hop sizes and sample rates.
if(vocoder.hop_size != singer.dsConfig.hop_size){
throw new Exception($"Vocoder's hop size is {vocoder.hop_size}, but acoustic's hop size is {singer.dsConfig.hop_size}.");
}
if(vocoder.sample_rate != singer.dsConfig.sample_rate){
throw new Exception($"Vocoder's sample rate is {vocoder.sample_rate}, but acoustic's sample rate is {singer.dsConfig.sample_rate}.");
}

var acousticModel = singer.getAcousticSession();
var frameMs = vocoder.frameMs();
var frameSec = frameMs / 1000;
Expand Down Expand Up @@ -242,7 +250,8 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup, Cancellati
} else{
userEnergy = Enumerable.Repeat(0d, totalFrames);
}
var energy = varianceResult.energy.Zip(userEnergy, (x,y)=>(float)Math.Min(x + y*12/100, 0)).ToArray();
var predictedEnergy = DiffSingerUtils.ResampleCurve(varianceResult.energy, totalFrames);
var energy = predictedEnergy.Zip(userEnergy, (x,y)=>(float)Math.Min(x + y*12/100, 0)).ToArray();
acousticInputs.Add(NamedOnnxValue.CreateFromTensor("energy",
new DenseTensor<float>(energy, new int[] { energy.Length })
.Reshape(new int[] { 1, energy.Length })));
Expand All @@ -251,7 +260,8 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup, Cancellati
var userBreathiness = DiffSingerUtils.SampleCurve(phrase, phrase.breathiness,
0, frameMs, totalFrames, headFrames, tailFrames,
x => x);
var breathiness = varianceResult.breathiness.Zip(userBreathiness, (x,y)=>(float)Math.Min(x + y*12/100, 0)).ToArray();
var predictedBreathiness = DiffSingerUtils.ResampleCurve(varianceResult.breathiness, totalFrames);
var breathiness = predictedBreathiness.Zip(userBreathiness, (x,y)=>(float)Math.Min(x + y*12/100, 0)).ToArray();
acousticInputs.Add(NamedOnnxValue.CreateFromTensor("breathiness",
new DenseTensor<float>(breathiness, new int[] { breathiness.Length })
.Reshape(new int[] { 1, breathiness.Length })));
Expand All @@ -271,14 +281,20 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup, Cancellati
var vocoderInputs = new List<NamedOnnxValue>();
vocoderInputs.Add(NamedOnnxValue.CreateFromTensor("mel", mel));
vocoderInputs.Add(NamedOnnxValue.CreateFromTensor("f0",f0tensor));
float[] samples;
Tensor<float> samplesTensor;
lock(vocoder){
if(cancellation.IsCancellationRequested) {
return null;
}
var vocoderOutputs = vocoder.session.Run(vocoderInputs);
samples = vocoderOutputs.First().AsTensor<float>().ToArray();
samplesTensor = vocoderOutputs.First().AsTensor<float>();
}
//Check the size of samplesTensor
int[] expectedShape = new int[] { 1, -1 };
if(!DiffSingerUtils.ValidateShape(samplesTensor, expectedShape)){
throw new Exception($"The shape of vocoder output should be (1, length), but the actual shape is {DiffSingerUtils.ShapeString(samplesTensor)}");
}
var samples = samplesTensor.ToArray();
return samples;
}

Expand Down
64 changes: 64 additions & 0 deletions OpenUtau.Core/DiffSinger/DiffSingerUtils.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using Microsoft.ML.OnnxRuntime.Tensors;
using OpenUtau.Core.Render;

namespace OpenUtau.Core.DiffSinger {
Expand Down Expand Up @@ -31,5 +32,68 @@ public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double de
Array.Fill(result, convert(curve[^1]), length - tailFrames, tailFrames);
return result;
}


//MusicMath.Linear, but float numbers are used instead of double
public static float LinearF(float x0, float x1, float y0, float y1, float x) {
const float ep = 0.001f;
if(x1 - x0 < ep){
return y1;
}
return y0 + (y1 - y0) * (x - x0) / (x1 - x0);
}

/// <summary>
/// Resample a curve to a new length.
/// Used when the hopsize of the variance model is different from the hopsize of the acoustic model.
/// </summary>
/// <param name="curve">The curve to resample.</param>
/// <param name="length">The new length of the curve.</param>
public static float[] ResampleCurve(float[] curve, int length) {
if (curve == null || curve.Length == 0) {
return null;
}
if(length == curve.Length){
return curve;
}
if(length == 1){
return new float[]{curve[0]};
}
float[] result = new float[length];
for (int i = 0; i < length; i++) {
var x = (float)i / (length - 1) * (curve.Length - 1);
int x0 = (int)x;
int x1 = Math.Min(x0 + 1, curve.Length - 1);
float y0 = curve[x0];
float y1 = curve[x1];
result[i] = LinearF(x0, x1, y0, y1, x);
}
return result;
}

/// <summary>
/// Validate the shape of a tensor.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="tensor">Tensor to be validated</param>
/// <param name="expectedShape">Expected shape of the tensor, -1 means the length of the axis is dynamic</param>
/// <returns></returns>
public static bool ValidateShape<T>(Tensor<T> tensor, int[] expectedShape){
var shape = tensor.Dimensions;
if(shape.Length != expectedShape.Length){
return false;
}
for (int i = 0; i < shape.Length; i++) {
if(shape[i] != expectedShape[i] && expectedShape[i] != -1){
return false;
}
}
return true;
}

public static string ShapeString<T>(Tensor<T> tensor){
var shape = tensor.Dimensions;
return "(" + string.Join(", ", shape.ToArray()) + ")";
}
}
}
2 changes: 1 addition & 1 deletion OpenUtau.Core/DiffSinger/DiffSingerVariance.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ public VarianceResult Process(RenderPhrase phrase){
new DenseTensor<Int64>(word_dur, new int[] { word_dur.Length }, false)
.Reshape(new int[] { 1, word_dur.Length })));
}else{
//if predict_dur is true, use phoneme encode mode
//if predict_dur is false, use phoneme encode mode
linguisticInputs.Add(NamedOnnxValue.CreateFromTensor("ph_dur",
new DenseTensor<Int64>(ph_dur.Select(x=>(Int64)x).ToArray(), new int[] { ph_dur.Length }, false)
.Reshape(new int[] { 1, ph_dur.Length })));
Expand Down
4 changes: 4 additions & 0 deletions OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ public class DsVocoder : IDisposable {
public DsVocoderConfig config;
public InferenceSession session;

public int num_mel_bins => config.num_mel_bins;
public int hop_size => config.hop_size;
public int sample_rate => config.sample_rate;

//Get vocoder by package name
public DsVocoder(string name) {
byte[] model;
Expand Down
20 changes: 20 additions & 0 deletions OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerGermanPhonemizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using OpenUtau.Api;
using OpenUtau.Core.G2p;

namespace OpenUtau.Core.DiffSinger
{
[Phonemizer("DiffSinger German Phonemizer", "DIFFS DE", language: "DE")]
public class DiffSingerGermanPhonemizer : DiffSingerG2pPhonemizer
{
protected override string GetDictionaryName()=>"dsdict-de.yaml";
protected override IG2p LoadBaseG2p() => new GermanG2p();
protected override string[] GetBaseG2pVowels() => new string[] {
"aa", "ae", "ah", "ao", "aw", "ax", "ay", "ee", "eh", "er", "ex", "ih", "iy", "oe", "ohh", "ooh", "oy", "ue", "uh", "uw", "yy"
};

protected override string[] GetBaseG2pConsonants() => new string[] {
"b", "cc", "ch", "d", "dh", "f", "g", "hh", "jh", "k", "l", "m",
"n", "ng", "p", "pf", "q", "r", "rr", "s", "sh", "t", "th", "ts", "v", "w", "x", "y", "z", "zh"
};
}
}
45 changes: 45 additions & 0 deletions OpenUtau.Core/Editing/NoteBatchEdits.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,51 @@ public void Run(UProject project, UVoicePart part, List<UNote> selectedNotes, Do
}
}

public class AddBreathNote : BatchEdit {
public string Name => name;

private string lyric;
private string name;

public AddBreathNote(string lyric) {
this.lyric = lyric;
this.name = "pianoroll.menu.notes.addbreath";
}

public void Run(UProject project, UVoicePart part, List<UNote> selectedNotes, DocManager docManager) {
List<UNote> toAdd = new List<UNote>();
var notes = selectedNotes.Count > 0 ? selectedNotes : part.notes.ToList();
foreach (var note in notes) {
if (note.lyric != lyric) {
int duration;
if (note.Prev == null) {
duration = 480;
} else if (note.Prev.lyric == lyric || note.position - 120 <= note.Prev.End) {
continue;
} else if (note.Prev.End < note.position - 960) {
duration = 480;
} else {
duration = note.position - note.Prev.End;
}
var addNote = project.CreateNote(note.tone, note.position - duration, duration);
foreach (var exp in note.phonemeExpressions.Where(exp => exp.index == 0)) {
addNote.SetExpression(project, project.tracks[part.trackNo], exp.abbr, new float[] { exp.value });
}
toAdd.Add(addNote);
}
}
if (toAdd.Count == 0) {
return;
}
docManager.StartUndoGroup(true);
foreach (var note in toAdd) {
note.lyric = lyric;
docManager.ExecuteCmd(new AddNoteCommand(part, note));
}
docManager.EndUndoGroup();
}
}

public class Transpose : BatchEdit {
public string Name => name;

Expand Down
27 changes: 13 additions & 14 deletions OpenUtau.Core/Ustx/UNote.cs
Original file line number Diff line number Diff line change
Expand Up @@ -156,23 +156,23 @@ public UPhonemeOverride GetPhonemeOverride(int index) {
}

public List<Tuple<float, bool>> GetExpression(UProject project, UTrack track, string abbr) {
track.TryGetExpression(project, abbr, out var descriptor);
track.TryGetExpression(project, abbr, out UExpression trackExp);
var list = new List<Tuple<float, bool>>();
int indexes = (phonemeExpressions.Max(exp => exp.index) ?? 0) + 1;

for (int i = 0; i < indexes; i++) {
var expression = phonemeExpressions.FirstOrDefault(exp => exp.descriptor?.abbr == descriptor.abbr && exp.index == i);
if (expression != null) {
list.Add(Tuple.Create(expression.value, true));
var phonemeExp = phonemeExpressions.FirstOrDefault(exp => exp.descriptor?.abbr == abbr && exp.index == i);
if (phonemeExp != null) {
list.Add(Tuple.Create(phonemeExp.value, true));
} else {
list.Add(Tuple.Create(descriptor.defaultValue, false));
list.Add(Tuple.Create(trackExp.value, false));
}
}
return list;
}

public void SetExpression(UProject project, UTrack track, string abbr, float[] values) {
if (!track.TryGetExpression(project, abbr, out var descriptor)) {
if (!track.TryGetExpression(project, abbr, out UExpression trackExp)) {
return;
}
int indexes = (phonemeExpressions.Max(exp => exp.index) ?? 0) + 1;
Expand All @@ -185,17 +185,16 @@ public void SetExpression(UProject project, UTrack track, string abbr, float[] v
value = values.Last();
}

if (descriptor.defaultValue == value) {
phonemeExpressions.RemoveAll(exp => exp.descriptor?.abbr == descriptor.abbr && exp.index == i);
if (trackExp.value == value) {
phonemeExpressions.RemoveAll(exp => exp.descriptor?.abbr == abbr && exp.index == i);
continue;
}
var expression = phonemeExpressions.FirstOrDefault(exp => exp.descriptor?.abbr == descriptor.abbr && exp.index == i);
if (expression != null) {
expression.descriptor = descriptor;
expression.value = value;
var phonemeExp = phonemeExpressions.FirstOrDefault(exp => exp.descriptor?.abbr == abbr && exp.index == i);
if (phonemeExp != null) {
phonemeExp.descriptor = trackExp.descriptor;
phonemeExp.value = value;
} else {
phonemeExpressions.Add(new UExpression(descriptor) {
descriptor = descriptor,
phonemeExpressions.Add(new UExpression(trackExp.descriptor) {
index = i,
value = value,
});
Expand Down
Loading

0 comments on commit bf8575f

Please sign in to comment.