Skip to content

Commit

Permalink
Merge pull request #7 from oxygen-dioxide/PhonemizerOnUtau
Browse files Browse the repository at this point in the history
Phonemizer on utau
  • Loading branch information
oxygen-dioxide authored Jan 28, 2024
2 parents 167e469 + ab04711 commit 7e460f5
Show file tree
Hide file tree
Showing 72 changed files with 5,568 additions and 1,815 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
on:
pull_request:
branches: [ master ]

jobs:
pr-test:
runs-on: ${{ matrix.os.runs-on }}

strategy:
matrix:
os:
- runs-on: windows-latest
arch: win-x64
- runs-on: macos-latest
arch: osx-x64
- runs-on: ubuntu-latest
arch: linux-x64

steps:
- uses: actions/checkout@v1

- name: restore
run: dotnet restore OpenUtau -r ${{ matrix.os.arch }}

- name: test
run: dotnet test OpenUtau.Test
22 changes: 20 additions & 2 deletions OpenUtau.Core/Api/G2pDictionary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ class TrieNode {

TrieNode root;
Dictionary<string, bool> phonemeSymbols; // (phoneme, isVowel)
HashSet<string> glideSymbols;

G2pDictionary(TrieNode root, Dictionary<string, bool> phonemeSymbols) {
G2pDictionary(TrieNode root, Dictionary<string, bool> phonemeSymbols, HashSet<string> glideSymbols) {
this.root = root;
this.phonemeSymbols = phonemeSymbols;
this.glideSymbols = glideSymbols;
}

public bool IsValidSymbol(string symbol) {
Expand All @@ -30,6 +32,10 @@ public bool IsVowel(string symbol) {
return phonemeSymbols.TryGetValue(symbol, out var isVowel) && isVowel;
}

public bool IsGlide(string symbol) {
return glideSymbols.Contains(symbol);
}

public string[] Query(string grapheme) {
return QueryTrie(root, grapheme, 0);
}
Expand All @@ -56,23 +62,35 @@ string[] QueryTrie(TrieNode node, string word, int index) {
public class Builder {
TrieNode root;
Dictionary<string, bool> phonemeSymbols; // (phoneme, isVowel)
HashSet<string> glideSymbols;

internal Builder() {
root = new TrieNode();
phonemeSymbols = new Dictionary<string, bool>();
glideSymbols = new HashSet<string>();
}

/// <summary>
/// Add valid symbols of dictionary.
/// </summary>
public Builder AddSymbol(string symbol, string type) {
phonemeSymbols[symbol] = type == "vowel";
if(type == "semivowel" || type == "liquid") {
glideSymbols.Add(symbol);
}
return this;
}
public Builder AddSymbol(string symbol, bool isVowel) {
phonemeSymbols[symbol] = isVowel;
return this;
}
public Builder AddSymbol(string symbol, bool isVowel, bool isGlide) {
phonemeSymbols[symbol] = isVowel;
if (isGlide && !isVowel) {
glideSymbols.Add(symbol);
}
return this;
}

/// <summary>
/// Must finish adding symbols before adding entries, otherwise symbols get ignored.
Expand Down Expand Up @@ -123,7 +141,7 @@ public Builder Load(TextReader textReader) {
}

public G2pDictionary Build() {
return new G2pDictionary(root, phonemeSymbols);
return new G2pDictionary(root, phonemeSymbols, glideSymbols);
}
}

Expand Down
9 changes: 9 additions & 0 deletions OpenUtau.Core/Api/G2pFallbacks.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ public bool IsVowel(string symbol) {
return false;
}

public bool IsGlide(string symbol) {
foreach (var dict in dictionaries) {
if (dict.IsValidSymbol(symbol)) {
return dict.IsGlide(symbol);
}
}
return false;
}

public string[] Query(string grapheme) {
foreach (var dict in dictionaries) {
var result = dict.Query(grapheme);
Expand Down
4 changes: 4 additions & 0 deletions OpenUtau.Core/Api/G2pPack.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ public bool IsVowel(string symbol) {
return Dict.IsVowel(symbol);
}

public bool IsGlide(string symbol) {
return Dict.IsGlide(symbol);
}

public string[] Query(string grapheme) {
if (grapheme.Length == 0 || kAllPunct.IsMatch(grapheme)) {
return null;
Expand Down
9 changes: 8 additions & 1 deletion OpenUtau.Core/Api/G2pRemapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@ namespace OpenUtau.Api {
public class G2pRemapper : IG2p {
private IG2p mapped;
private Dictionary<string, bool> phonemeSymbols; // (phoneme, isVowel)
private HashSet<string> glideSymbols;
private Dictionary<string, string> replacements;

public G2pRemapper(IG2p mapped,
Dictionary<string, bool> phonemeSymbols,
Dictionary<string, string> replacements) {
Dictionary<string, string> replacements,
HashSet<string> glideSymbols = null) {
this.mapped = mapped;
this.phonemeSymbols = phonemeSymbols;
this.replacements = replacements;
this.glideSymbols = glideSymbols ?? new HashSet<string>();
}

public bool IsValidSymbol(string symbol) {
Expand All @@ -23,6 +26,10 @@ public bool IsVowel(string symbol) {
return phonemeSymbols.TryGetValue(symbol, out var isVowel) && isVowel;
}

public bool IsGlide(string symbol) {
return glideSymbols.Contains(symbol);
}

public string[] Query(string grapheme) {
var phonemes = mapped.Query(grapheme);
if (phonemes == null) {
Expand Down
5 changes: 5 additions & 0 deletions OpenUtau.Core/Api/IG2p.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ public interface IG2p {
bool IsValidSymbol(string symbol);
bool IsVowel(string symbol);

/// <summary>
/// Returns true if the symbol is a semivowel or liquid phoneme, like y, w, l, r in English.
/// </summary>
bool IsGlide(string symbol);

/// <summary>
/// Produces a list of phonemes from grapheme.
/// </summary>
Expand Down
1 change: 0 additions & 1 deletion OpenUtau.Core/Classic/ClassicRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using NAudio.Wave;
Expand Down
6 changes: 1 addition & 5 deletions OpenUtau.Core/Classic/ClassicSingerLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@ static USinger AdjustSingerType(Voicebank v) {
}
public static IEnumerable<USinger> FindAllSingers() {
List<USinger> singers = new List<USinger>();
foreach (var path in new string[] {
PathManager.Inst.SingersPathOld,
PathManager.Inst.SingersPath,
PathManager.Inst.AdditionalSingersPath,
}) {
foreach (var path in PathManager.Inst.SingersPaths) {
var loader = new VoicebankLoader(path);
singers.AddRange(loader.SearchAll()
.Select(AdjustSingerType));
Expand Down
13 changes: 11 additions & 2 deletions OpenUtau.Core/Classic/ExeWavtool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ public float[] Concatenate(List<ResamplerItem> resamplerItems, string tempPath,
if (cancellation.IsCancellationRequested) {
return null;
}
//The builtin worldline resampler can't be called from bat script,
//so we need to call it directly from C#
foreach(var item in resamplerItems){
if(!(item.resampler is ExeResampler) && !cancellation.IsCancellationRequested && !File.Exists(item.outputFile)){
lock (Renderers.GetCacheLock(item.outputFile)) {
item.resampler.DoResamplerReturnsFile(item, Log.Logger);
}
}
}
PrepareHelper();
string batPath = Path.Combine(PathManager.Inst.CachePath, "temp.bat");
lock (tempBatLock) {
Expand Down Expand Up @@ -93,15 +102,15 @@ void WriteSetUp(StreamWriter writer, List<ResamplerItem> resamplerItems, string

void WriteItem(StreamWriter writer, ResamplerItem item, int index, int total) {
writer.WriteLine($"@set resamp={item.resampler.FilePath}");
writer.WriteLine($"@set params={item.volume} {item.modulation} !{item.tempo.ToString("G999")} {Base64.Base64EncodeInt12(item.pitches)}");
writer.WriteLine($"@set params={item.volume} {item.modulation} !{item.tempo:G999} {Base64.Base64EncodeInt12(item.pitches)}");
writer.WriteLine($"@set flag=\"{item.GetFlagsString()}\"");
writer.WriteLine($"@set env={GetEnvelope(item)}");
writer.WriteLine($"@set stp={item.skipOver}");
writer.WriteLine($"@set vel={item.velocity}");
string relOutputFile = Path.GetRelativePath(PathManager.Inst.CachePath, item.outputFile);
writer.WriteLine($"@set temp=\"%cachedir%\\{relOutputFile}\"");
string toneName = MusicMath.GetToneName(item.tone);
string dur = $"{item.phone.duration.ToString("G999")}@{item.phone.adjustedTempo.ToString("G999")}{(item.durCorrection >= 0 ? "+" : "")}{item.durCorrection}";
string dur = $"{item.phone.duration:G999}@{item.phone.adjustedTempo:G999}{(item.durCorrection >= 0 ? "+" : "")}{item.durCorrection}";
string relInputTemp = Path.GetRelativePath(PathManager.Inst.CachePath, item.inputTemp);
writer.WriteLine($"@echo {MakeProgressBar(index + 1, total)}");
writer.WriteLine($"@call %helper% \"%oto%\\{relInputTemp}\" {toneName} {dur} {item.preutter} {item.offset} {item.durRequired} {item.consonant} {item.cutoff} {index}");
Expand Down
42 changes: 41 additions & 1 deletion OpenUtau.Core/Classic/ResamplerItem.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Numerics;
using System.Text;
using K4os.Hash.xxHash;
using NAudio.Wave;
using OpenUtau.Core;
using OpenUtau.Core.Render;
using OpenUtau.Core.Ustx;
using static NetMQ.NetMQSelector;
using static OpenUtau.Api.Phonemizer;

namespace OpenUtau.Classic {
Expand Down Expand Up @@ -80,7 +82,7 @@ public ResamplerItem(RenderPhrase phrase, RenderPhone phone) {
var pitchIntervalMs = MusicMath.TempoTickToMs(tempo, 5);
var pitchSampleStartMs = phone.positionMs - pitchLeadingMs;

for (int i=0; i<pitches.Length; i++) {
for (int i = 0; i < pitches.Length; i++) {
var samplePosMs = pitchSampleStartMs + pitchIntervalMs * i;
var samplePosTick = (int)Math.Floor(phrase.timeAxis.MsPosToNonExactTickPos(samplePosMs));

Expand Down Expand Up @@ -145,5 +147,43 @@ ulong Hash() {
}
}
}

public List<Vector2> EnvelopeMsToSamples() {
int skipOverSamples = (int)(skipOver * 44100 / 1000);
var envelope = phone.envelope.ToList();
double shift = -envelope[0].X;
for (int i = 0; i < envelope.Count; ++i) {
var point = envelope[i];
point.X = (float)((point.X + shift) * 44100 / 1000) + skipOverSamples;
point.Y /= 100;
envelope[i] = point;
}
return envelope;
}

public void ApplyEnvelope(float[] samples) {
var envelope = EnvelopeMsToSamples();
int nextPoint = 0;
for (int i = 0; i < samples.Length; ++i) {
while (nextPoint < envelope.Count && i > envelope[nextPoint].X) {
nextPoint++;
}
float gain;
if (nextPoint == 0) {
gain = envelope.First().Y;
} else if (nextPoint >= envelope.Count) {
gain = envelope.Last().Y;
} else {
var p0 = envelope[nextPoint - 1];
var p1 = envelope[nextPoint];
if (p0.X >= p1.X) {
gain = p0.Y;
} else {
gain = p0.Y + (p1.Y - p0.Y) * (i - p0.X) / (p1.X - p0.X);
}
}
samples[i] *= gain;
}
}
}
}
67 changes: 15 additions & 52 deletions OpenUtau.Core/Classic/SharpWavtool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,23 +56,23 @@ public float[] Concatenate(List<ResamplerItem> resamplerItems, string tempPath,
segment.posMs = item.phone.positionMs - item.phone.leadingMs - (phrase.positionMs - phrase.leadingMs);
segment.posSamples = (int)Math.Round(segment.posMs * 44100 / 1000);
segment.skipSamples = (int)Math.Round(item.skipOver * 44100 / 1000);
segment.envelope = EnvelopeMsToSamples(item.phone.envelope, segment.skipSamples);

if (!phaseComp) {
continue;
segment.envelope = item.EnvelopeMsToSamples();

if (phaseComp) {
var headWindow = GetHeadWindow(segment.samples, segment.envelope, out segment.headWindowStart);
segment.headWindowF0 = GetF0AtSample(phrase,
segment.posSamples - segment.skipSamples + segment.headWindowStart + headWindow.Length / 2);
segment.headPhase = CalcPhase(headWindow,
segment.posSamples - segment.skipSamples + segment.headWindowStart, 44100, segment.headWindowF0);

var tailWindow = GetTailWindow(segment.samples, segment.envelope, out segment.tailWindowStart);
segment.tailWindowF0 = GetF0AtSample(phrase,
segment.posSamples - segment.skipSamples + segment.tailWindowStart + tailWindow.Length / 2);
segment.tailPhase = CalcPhase(tailWindow,
segment.posSamples - segment.skipSamples + segment.tailWindowStart, 44100, segment.tailWindowF0);
}

var headWindow = GetHeadWindow(segment.samples, segment.envelope, out segment.headWindowStart);
segment.headWindowF0 = GetF0AtSample(phrase,
segment.posSamples - segment.skipSamples + segment.headWindowStart + headWindow.Length / 2);
segment.headPhase = CalcPhase(headWindow,
segment.posSamples - segment.skipSamples + segment.headWindowStart, 44100, segment.headWindowF0);

var tailWindow = GetTailWindow(segment.samples, segment.envelope, out segment.tailWindowStart);
segment.tailWindowF0 = GetF0AtSample(phrase,
segment.posSamples - segment.skipSamples + segment.tailWindowStart + tailWindow.Length / 2);
segment.tailPhase = CalcPhase(tailWindow,
segment.posSamples - segment.skipSamples + segment.tailWindowStart, 44100, segment.tailWindowF0);
item.ApplyEnvelope(segment.samples);
}

if (phaseComp) {
Expand Down Expand Up @@ -100,50 +100,13 @@ public float[] Concatenate(List<ResamplerItem> resamplerItems, string tempPath,
var phraseSamples = new float[0];
foreach (var segment in segments) {
Array.Resize(ref phraseSamples, segment.posSamples + segment.correction + segment.samples.Length - segment.skipSamples);
ApplyEnvelope(segment.samples, segment.envelope);
for (int i = Math.Max(0, -segment.skipSamples); i < segment.samples.Length - segment.skipSamples; i++) {
phraseSamples[segment.posSamples + segment.correction + i] += segment.samples[segment.skipSamples + i];
}
}
return phraseSamples;
}

private static void ApplyEnvelope(float[] data, IList<Vector2> envelope) {
int nextPoint = 0;
for (int i = 0; i < data.Length; ++i) {
while (nextPoint < envelope.Count && i > envelope[nextPoint].X) {
nextPoint++;
}
float gain;
if (nextPoint == 0) {
gain = envelope.First().Y;
} else if (nextPoint >= envelope.Count) {
gain = envelope.Last().Y;
} else {
var p0 = envelope[nextPoint - 1];
var p1 = envelope[nextPoint];
if (p0.X >= p1.X) {
gain = p0.Y;
} else {
gain = p0.Y + (p1.Y - p0.Y) * (i - p0.X) / (p1.X - p0.X);
}
}
data[i] *= gain;
}
}

private static IList<Vector2> EnvelopeMsToSamples(IList<Vector2> envelope, int skipOverSamples) {
envelope = new List<Vector2>(envelope);
double shift = -envelope[0].X;
for (var i = 0; i < envelope.Count; i++) {
var point = envelope[i];
point.X = (float)((point.X + shift) * 44100 / 1000) + skipOverSamples;
point.Y /= 100;
envelope[i] = point;
}
return envelope;
}

private float[] GetHeadWindow(float[] samples, IList<Vector2> envelope, out int windowStart) {
var windowCenter = (envelope[0] + envelope[1]) * 0.5f;
windowStart = Math.Max((int)windowCenter.X - 440, 0);
Expand Down
Loading

0 comments on commit 7e460f5

Please sign in to comment.