Skip to content

Commit

Permalink
Merge branch 'stakira:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
oxygen-dioxide authored Feb 26, 2024
2 parents e7f805e + 8597a98 commit b5d8a1a
Show file tree
Hide file tree
Showing 43 changed files with 692 additions and 248 deletions.
2 changes: 2 additions & 0 deletions OpenUtau.Core/Api/Phonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ public struct Phoneme {
/// </summary>
public PhonemeAttributes attributes;

public int? index;

public override string ToString() => $"\"{phoneme}\" pos:{position}";
}

Expand Down
2 changes: 1 addition & 1 deletion OpenUtau.Core/Classic/ClassicSinger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public override void Reload() {
void Load() {
if (Avatar != null && File.Exists(Avatar)) {
try {
using (var stream = new FileStream(Avatar, FileMode.Open)) {
using (var stream = new FileStream(Avatar, FileMode.Open, FileAccess.Read)) {
using (var memoryStream = new MemoryStream()) {
stream.CopyTo(memoryStream);
avatarData = memoryStream.ToArray();
Expand Down
83 changes: 83 additions & 0 deletions OpenUtau.Core/Classic/VoicebankPublisher.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
using Ignore;
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;

using OpenUtau.Core.Ustx;

namespace OpenUtau.Classic {
public class VoicebankPublisher {
private readonly Action<double, string> progress;
private readonly Ignore.Ignore? ignore;

public VoicebankPublisher(Action<double, string> progress, string? gitIgnore) {
this.progress = progress;
if(gitIgnore != null) {
ignore = new Ignore.Ignore();
ignore.Add(gitIgnore.Split("\n"));
}
}

private static void ModifyConfig(USinger singer, Action<VoicebankConfig> modify) {
var yamlFile = Path.Combine(singer.Location, "character.yaml");
VoicebankConfig? config = null;
if (File.Exists(yamlFile)) {
using (var stream = File.OpenRead(yamlFile)) {
config = VoicebankConfig.Load(stream);
}
}
if (config == null) {
config = new VoicebankConfig();
}
modify(config);
using (var stream = File.Open(yamlFile, FileMode.Create)) {
config.Save(stream);
}
}

private bool IsIgnored(string relativePath){
return ignore?.IsIgnored(relativePath.Replace('\\', '/')) ?? false;
}

private List<string> GetFilesToPack(string singerPath)
{
List<string> fileList = Directory.EnumerateFiles(singerPath, "*.*", SearchOption.AllDirectories).ToList();
List<string> packList = fileList.FindAll(x => !IsIgnored(System.IO.Path.GetRelativePath(singerPath, x)));
return packList;
}

///<summary>
///Compress a voicebank into an optimized zip archive for distribution.
///This function only supports voicebanks that follow the classic packaging model,
///including utau, enunu and diffsinger.
///Vogen voicebanks aren't supported.
///</summary>
public void Publish(USinger singer, string outputFile){
var location = singer.Location;
if(!Directory.Exists(location)){
return;
}
progress.Invoke(0, $"Publishing {singer.Name}");
//Write singer type into character.yaml
try {
ModifyConfig(singer, config => config.SingerType = singer.SingerType.ToString().ToLower());
} catch (Exception e) { }
var packList = GetFilesToPack(location);
int index = 0;
int fileCount = packList.Count();
using(ZipArchive archive = new ZipArchive(File.Create(outputFile), ZipArchiveMode.Create))
{
foreach (var absFilePath in packList)
{
index++;
progress.Invoke(100.0 * index / fileCount, $"Compressing {absFilePath}");
string reFilePath = Path.GetRelativePath(location, absFilePath);
archive.CreateEntryFromFile(absFilePath, reFilePath);
}
}
progress.Invoke(0, $"Published {singer.Name} to {outputFile}");
}
}
}
22 changes: 13 additions & 9 deletions OpenUtau.Core/Commands/ExpCommands.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;

using OpenUtau.Core.Ustx;
using OpenUtau.Core.Util;

Expand All @@ -24,15 +23,15 @@ public ExpCommand(UVoicePart part) {
public class SetNoteExpressionCommand : ExpCommand {
public readonly UProject project;
public readonly UTrack track;
public readonly float[] newValue;
public readonly float[] oldValue;
public SetNoteExpressionCommand(UProject project, UTrack track, UVoicePart part, UNote note, string abbr, float[] values) : base(part) {
public readonly float?[] newValue;
public readonly float?[] oldValue;
public SetNoteExpressionCommand(UProject project, UTrack track, UVoicePart part, UNote note, string abbr, float?[] values) : base(part) {
this.project = project;
this.track = track;
this.Note = note;
Key = abbr;
newValue = values;
oldValue = note.GetExpression(project, track, abbr).Select(t => t.Item1).ToArray();
oldValue = note.GetExpressionNoteHas(project, track, abbr);
}
public override string ToString() => $"Set note expression {Key}";
public override void Execute() => Note.SetExpression(project, track, Key, newValue);
Expand All @@ -47,21 +46,26 @@ public class SetPhonemeExpressionCommand : ExpCommand {
public readonly UProject project;
public readonly UTrack track;
public readonly UPhoneme phoneme;
public readonly float newValue;
public readonly float oldValue;
public readonly float? newValue;
public readonly float? oldValue;
public override ValidateOptions ValidateOptions
=> new ValidateOptions {
SkipTiming = true,
Part = Part,
SkipPhonemizer = !needsPhonemizer.Contains(Key),
};
public SetPhonemeExpressionCommand(UProject project, UTrack track, UVoicePart part, UPhoneme phoneme, string abbr, float value) : base(part) {
public SetPhonemeExpressionCommand(UProject project, UTrack track, UVoicePart part, UPhoneme phoneme, string abbr, float? value) : base(part) {
this.project = project;
this.track = track;
this.phoneme = phoneme;
Key = abbr;
newValue = value;
oldValue = phoneme.GetExpression(project, track, abbr).Item1;
var oldExp = phoneme.GetExpression(project, track, abbr);
if (oldExp.Item2) {
oldValue = oldExp.Item1;
} else {
oldValue = null;
}
}
public override string ToString() => $"Set phoneme expression {Key}";
public override void Execute() {
Expand Down
48 changes: 30 additions & 18 deletions OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ string[] GetSymbols(Note note) {
//1. phonetic hint
//2. query from g2p dictionary
//3. treat lyric as phonetic hint, including single phoneme
//4. default pause
//4. empty
if (!string.IsNullOrEmpty(note.phoneticHint)) {
// Split space-separated symbols into an array.
return note.phoneticHint.Split()
Expand All @@ -108,7 +108,7 @@ string[] GetSymbols(Note note) {
if (lyricSplited.Length > 0) {
return lyricSplited;
}
return new string[] { defaultPause };
return new string[] { };
}

string GetSpeakerAtIndex(Note note, int index){
Expand All @@ -122,21 +122,20 @@ string GetSpeakerAtIndex(Note note, int index){
return speaker.Suffix;
}

dsPhoneme[] GetDsPhonemes(Note note){
return GetSymbols(note)
.Select((symbol, index) => new dsPhoneme(symbol, GetSpeakerAtIndex(note, index)))
.ToArray();
}

protected bool IsSyllableVowelExtensionNote(Note note) {
return note.lyric.StartsWith("+~") || note.lyric.StartsWith("+*");
}

List<phonemesPerNote> ProcessWord(Note[] notes){
/// <summary>
/// distribute phonemes to each note inside the group
/// </summary>
List<phonemesPerNote> ProcessWord(Note[] notes, string[] symbols){
var wordPhonemes = new List<phonemesPerNote>{
new phonemesPerNote(-1, notes[0].tone)
};
var dsPhonemes = GetDsPhonemes(notes[0]);
var dsPhonemes = symbols
.Select((symbol, index) => new dsPhoneme(symbol, GetSpeakerAtIndex(notes[0], index)))
.ToArray();
var isVowel = dsPhonemes.Select(s => g2p.IsVowel(s.Symbol)).ToArray();
var isGlide = dsPhonemes.Select(s => g2p.IsGlide(s.Symbol)).ToArray();
var nonExtensionNotes = notes.Where(n=>!IsSyllableVowelExtensionNote(n)).ToArray();
Expand Down Expand Up @@ -217,8 +216,17 @@ protected override void ProcessPart(Note[][] phrase) {
new phonemesPerNote(-1,phrase[0][0].tone, new List<dsPhoneme>{new dsPhoneme("SP", GetSpeakerAtIndex(phrase[0][0], 0))})
};
var notePhIndex = new List<int> { 1 };
foreach (var word in phrase) {
var wordPhonemes = ProcessWord(word);
var wordFound = new bool[phrase.Length];
foreach (int wordIndex in Enumerable.Range(0, phrase.Length)) {
Note[] word = phrase[wordIndex];
var symbols = GetSymbols(word[0]);
if (symbols == null || symbols.Length == 0) {
symbols = new string[] { defaultPause };
wordFound[wordIndex] = false;
} else {
wordFound[wordIndex] = true;
}
var wordPhonemes = ProcessWord(word, symbols);
phrasePhonemes[^1].Phonemes.AddRange(wordPhonemes[0].Phonemes);
phrasePhonemes.AddRange(wordPhonemes.Skip(1));
notePhIndex.Add(notePhIndex[^1]+wordPhonemes.SelectMany(n=>n.Phonemes).Count());
Expand Down Expand Up @@ -310,20 +318,24 @@ protected override void ProcessPart(Note[][] phrase) {

//Convert the position sequence to tick and fill into the result list
int index = 1;
foreach (int groupIndex in Enumerable.Range(0, phrase.Length)) {
Note[] group = phrase[groupIndex];
foreach (int wordIndex in Enumerable.Range(0, phrase.Length)) {
Note[] word = phrase[wordIndex];
var noteResult = new List<Tuple<string, int>>();
if (group[0].lyric.StartsWith("+")) {
if (!wordFound[wordIndex]){
//partResult[word[0].position] = noteResult;
continue;
}
if (word[0].lyric.StartsWith("+")) {
continue;
}
double notePos = timeAxis.TickPosToMsPos(group[0].position);//start position of the note, ms
for (int phIndex = notePhIndex[groupIndex]; phIndex < notePhIndex[groupIndex + 1]; ++phIndex) {
double notePos = timeAxis.TickPosToMsPos(word[0].position);//start position of the note, ms
for (int phIndex = notePhIndex[wordIndex]; phIndex < notePhIndex[wordIndex + 1]; ++phIndex) {
if (!String.IsNullOrEmpty(phs[phIndex].Symbol)) {
noteResult.Add(Tuple.Create(phs[phIndex].Symbol, timeAxis.TicksBetweenMsPos(
notePos, positions[phIndex - 1])));
}
}
partResult[group[0].position] = noteResult;
partResult[word[0].position] = noteResult;
}
}
}
Expand Down
8 changes: 7 additions & 1 deletion OpenUtau.Core/DiffSinger/DiffSingerConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ public class DsConfig {
public bool useKeyShiftEmbed = false;
public bool useSpeedEmbed = false;
public bool useEnergyEmbed = false;
public bool useBreathinessEmbed= false;
public bool useBreathinessEmbed = false;
public bool useVoicingEmbed = false;
public bool useTensionEmbed = false;
public AugmentationArgs augmentationArgs;
public bool useShallowDiffusion = false;
public int maxDepth = -1;
Expand All @@ -34,6 +36,10 @@ public class DsConfig {
public int hop_size = 512;
public int sample_rate = 44100;
public bool predict_dur = true;
public bool predict_energy = true;
public bool predict_breathiness = true;
public bool predict_voicing = false;
public bool predict_tension = false;
public bool use_expr = false;
public bool use_note_rest = false;
public float frameMs(){
Expand Down
31 changes: 28 additions & 3 deletions OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ public class DiffSingerRenderer : IRenderer {
Format.Ustx.GENC,
Format.Ustx.CLR,
Format.Ustx.BREC,
Format.Ustx.VOIC,
Format.Ustx.TENC,
VELC,
ENE,
PEXP,
Expand Down Expand Up @@ -228,9 +230,12 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup, Cancellati
acousticInputs.Add(NamedOnnxValue.CreateFromTensor("velocity", velocityTensor));
}

//Variance: Energy and Breathiness

if(singer.dsConfig.useBreathinessEmbed || singer.dsConfig.useEnergyEmbed){
//Variance: Energy, Breathiness, Voicing and Tension
if(
singer.dsConfig.useBreathinessEmbed
|| singer.dsConfig.useEnergyEmbed
|| singer.dsConfig.useVoicingEmbed
|| singer.dsConfig.useTensionEmbed) {
var variancePredictor = singer.getVariancePredictor();
VarianceResult varianceResult;
lock(variancePredictor){
Expand Down Expand Up @@ -266,6 +271,26 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup, Cancellati
new DenseTensor<float>(breathiness, new int[] { breathiness.Length })
.Reshape(new int[] { 1, breathiness.Length })));
}
if(singer.dsConfig.useVoicingEmbed){
var userVoicing = DiffSingerUtils.SampleCurve(phrase, phrase.voicing,
0, frameMs, totalFrames, headFrames, tailFrames,
x => x);
var predictedVoicing = DiffSingerUtils.ResampleCurve(varianceResult.voicing, totalFrames);
var voicing = predictedVoicing.Zip(userVoicing, (x,y)=>(float)Math.Min(x + (y-100)*12/100, 0)).ToArray();
acousticInputs.Add(NamedOnnxValue.CreateFromTensor("voicing",
new DenseTensor<float>(voicing, new int[] { voicing.Length })
.Reshape(new int[] { 1, voicing.Length })));
}
if(singer.dsConfig.useTensionEmbed){
var userTension = DiffSingerUtils.SampleCurve(phrase, phrase.tension,
0, frameMs, totalFrames, headFrames, tailFrames,
x => x);
var predictedTension = DiffSingerUtils.ResampleCurve(varianceResult.tension, totalFrames);
var tension = predictedTension.Zip(userTension, (x,y)=>(float)(x + y * 5 / 100)).ToArray();
acousticInputs.Add(NamedOnnxValue.CreateFromTensor("tension",
new DenseTensor<float>(tension, new int[] { tension.Length })
.Reshape(new int[] { 1, tension.Length })));
}
}
Tensor<float> mel;
lock(acousticModel){
Expand Down
2 changes: 1 addition & 1 deletion OpenUtau.Core/DiffSinger/DiffSingerSinger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public DiffSingerSinger(Voicebank voicebank) {
//Load Avatar
if (Avatar != null && File.Exists(Avatar)) {
try {
using (var stream = new FileStream(Avatar, FileMode.Open)) {
using (var stream = new FileStream(Avatar, FileMode.Open, FileAccess.Read)) {
using (var memoryStream = new MemoryStream()) {
stream.CopyTo(memoryStream);
avatarData = memoryStream.ToArray();
Expand Down
Loading

0 comments on commit b5d8a1a

Please sign in to comment.