Skip to content

Commit

Permalink
Add C# TTS API (#399)
Browse files Browse the repository at this point in the history
  • Loading branch information
LKZMuZiLi authored Oct 28, 2023
1 parent 157628b commit 4115f97
Show file tree
Hide file tree
Showing 7 changed files with 455 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
using NAudio.Wave;

namespace TTS.Struct
{
public sealed partial class SherpaOnnxGeneratedAudioResult
{
private WaveOutEvent waveOut;
private WaveFormat waveFormat;
private BufferedWaveProvider bufferedWaveProvider;

private int bufferLength = 1;

public TimeSpan? AudioDuration => bufferedWaveProvider?.BufferedDuration;

public float PlayProgress => (waveOut?.GetPosition() * 1.0f / bufferLength).Value;

public void Play()
{
waveOut ??= new WaveOutEvent();

waveFormat ??= new WaveFormat(sample_rate, AudioDataBit, Channels); // 32-bit 浮点,单声道

if (bufferedWaveProvider == null)
{
bufferedWaveProvider ??= new BufferedWaveProvider(waveFormat);

var buffer = AudioByteData;

bufferLength = buffer.Length;

bufferedWaveProvider.AddSamples(buffer, 0, bufferLength);
bufferedWaveProvider.BufferLength = bufferLength;
waveOut.Init(bufferedWaveProvider);
}
waveOut.Play();
}

public void Stop()
{
waveOut?.Stop();
}

}
}
62 changes: 62 additions & 0 deletions dotnet-examples/TTS/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
using System.Text;
using TTS;
using TTS.Struct;

internal class Program
{
private static void Main(string[] args)
{
SherpaOnnxOfflineTtsConfig sherpaOnnxOfflineTtsConfig = new SherpaOnnxOfflineTtsConfig();
sherpaOnnxOfflineTtsConfig.model = new SherpaOnnxOfflineTtsModelConfig
{
debug = 0,
num_threads = 4,
provider = "cpu",
vits = new SherpaOnnxOfflineTtsVitsModelConfig
{
lexicon = "vits-zh-aishell3/lexicon.txt",
model = "vits-zh-aishell3/vits-aishell3.onnx",
tokens = "vits-zh-aishell3/tokens.txt",

noise_scale = 0.667f,
noise_scale_w = 0.8f,
length_scale = 1,
},

};

TTSCore i = new TTSCore(sherpaOnnxOfflineTtsConfig);

Console.InputEncoding = Encoding.Unicode;
Console.OutputEncoding = Encoding.UTF8;

while (true)
{
var str = Console.ReadLine();
var audioResult = i.ToSpeech(str, 40, 1f);

// audioResult.WriteWAVFile("123.wav");保存本地

audioResult.Play();

int lastIndex = -1;
while (audioResult.PlayProgress <= 1f)
{
int index = (int)(audioResult.PlayProgress * (str.Length - 1));
if (lastIndex != index)
{
Console.Write(str[index]);
lastIndex = index;
}
Thread.Sleep(100);
}

if (++lastIndex < str.Length)
Console.Write(str[lastIndex]);

Console.WriteLine();

}

}
}
198 changes: 198 additions & 0 deletions dotnet-examples/TTS/Struct/SherpaOnnxGeneratedAudio.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;

namespace TTS.Struct
{
/// <summary>
/// 生成语音结果
/// </summary>
public sealed partial class SherpaOnnxGeneratedAudioResult : IDisposable
{
public const string Filename = "sherpa-onnx-c-api";

/// <summary>
/// 销毁非托管内存
/// </summary>
/// <param name="ttsGenerateIntptr"></param>
[DllImport(Filename)]
private static extern void SherpaOnnxDestroyOfflineTtsGeneratedAudio(IntPtr ttsGenerateIntptr);

[DllImport(Filename)]
private static extern int SherpaOnnxWriteWave(IntPtr q, int n, int sample_rate, string filename);

/// <summary>
/// 音频数据比特
/// </summary>
public const int AudioDataBit = 16;
/// <summary>
/// 单通道
/// </summary>
public const int Channels = 1;

/// <summary>
/// 原生句柄
/// </summary>
internal IntPtr thisHandle;

internal readonly IntPtr audioData;
internal readonly int dataSize;

/// <summary>
/// 采样率
/// </summary>
public readonly int sample_rate;

/// <summary>
/// 音频数据指针
/// </summary>
public IntPtr AudioDataIntPtr => audioData;

/// <summary>
/// 数据的大小
/// </summary>
public unsafe int AudioDataLength
{
get
{
return dataSize;

//float* buffer = (float*)audioData;
//while (*buffer != 0)
// ++buffer;
//return (int)(buffer - (float*)audioData);
}
}

/// <summary>
/// 获得音频数据 float[]
/// 这个内部创建一个数组
/// </summary>
public unsafe float[] AudioFloatData
{
get
{
int length = AudioDataLength;

float[] floatAudioData = new float[length];
Marshal.Copy(audioData, floatAudioData, 0, floatAudioData.Length);
return floatAudioData;
}
}


/// <summary>
/// 获得音频数据 byte[]
/// 这个内部创建一个数组
/// </summary>
public byte[] AudioByteData
{
get
{
byte[] bytes = new byte[AudioDataLength * 2];
ReadData(bytes, 0);
return bytes;
}
}

internal SherpaOnnxGeneratedAudioResult(IntPtr intPtr, SherpaOnnxGeneratedAudio sherpaOnnx)
{
this.thisHandle = intPtr;
this.audioData = sherpaOnnx.audioData;
this.dataSize = sherpaOnnx.dataSize;
this.sample_rate = sherpaOnnx.sample_rate;
}

~SherpaOnnxGeneratedAudioResult()
{
Dispose();
}

/// <summary>
/// 读取数据
/// 没有垃圾产生,自己传递数组进来
/// </summary>
/// <param name="audioFloats">数组</param>
/// <param name="offset">数组那个位置写入</param>
/// <returns>写入了多少个</returns>
public int ReadData(float[] audioFloats, int offset)
{
int length = AudioDataLength;

int c = audioFloats.Length - offset;
length = c >= length ? length : c;

Marshal.Copy(audioData, audioFloats, offset, length);
return length;
}

/// <summary>
/// 读取数据
/// 这个内部转换成byte[] 音频数组
/// 没有垃圾产生,自己传递数组进来
/// </summary>
/// <param name="audioFloats">数组,这个长度需要是AudioDataLength*2大小</param>
/// <param name="offset">数组那个位置写入</param>
/// <returns>写入了多少个</returns>
public int ReadData(byte[] audioFloats, int offset)
{
//因为是16bit存储音频数据,所以float会转换成两个字节存储
var audiodata = AudioFloatData;

int length = audiodata.Length * 2;

int c = audioFloats.Length - offset;
c = c % 2 == 0 ? c : c - 1;

length = c >= length ? length : c;

int p = length / 2;

for (int i = 0; i < p; i++)
{
short value = (short)(audiodata[i] * short.MaxValue);

audioFloats[offset++] = (byte)value;
audioFloats[offset++] = (byte)(value >> 8);
}

return length;

}

/// <summary>
/// 写入WAV音频数据
/// </summary>
/// <param name="filename"></param>
/// <returns></returns>
public bool WriteWAVFile(string filename)
{
return 1 == SherpaOnnxWriteWave(audioData, this.dataSize, this.sample_rate, filename);
}

public void Dispose()
{
if (this.thisHandle != IntPtr.Zero)
{
SherpaOnnxDestroyOfflineTtsGeneratedAudio(this.thisHandle);
GC.SuppressFinalize(this);
this.thisHandle = IntPtr.Zero;
}
}
}

[StructLayout(LayoutKind.Sequential)]
internal struct SherpaOnnxGeneratedAudio
{
internal readonly IntPtr audioData;
internal readonly int dataSize;

/// <summary>
/// 采样率
/// </summary>
public readonly int sample_rate;
}
}
10 changes: 10 additions & 0 deletions dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsConfig.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using System.Runtime.InteropServices;

namespace TTS.Struct
{
[StructLayout(LayoutKind.Sequential)]
public struct SherpaOnnxOfflineTtsConfig
{
public SherpaOnnxOfflineTtsModelConfig model;
}
}
23 changes: 23 additions & 0 deletions dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsModelConfig.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
using System.Runtime.InteropServices;

namespace TTS.Struct
{
[StructLayout(LayoutKind.Sequential)]
public struct SherpaOnnxOfflineTtsModelConfig
{
/// <summary>
/// 模型配置
/// </summary>
public SherpaOnnxOfflineTtsVitsModelConfig vits;
/// <summary>
/// 线程数
/// </summary>
public int num_threads;
public int debug;
/// <summary>
/// 使用cpu
/// </summary>
[MarshalAs(UnmanagedType.LPStr)]
public string provider;
}
}
48 changes: 48 additions & 0 deletions dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsVitsModelConfig.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
using System.Runtime.InteropServices;

namespace TTS.Struct
{
[StructLayout(LayoutKind.Sequential)]
public struct SherpaOnnxOfflineTtsVitsModelConfig
{
/// <summary>
/// 模型
/// "vits-zh-aishell3/vits-aishell3.onnx"
/// </summary>
[MarshalAs(UnmanagedType.LPStr)]
public string model;
/// <summary>
/// 词典文件
/// "vits-zh-aishell3/lexicon.txt"
/// </summary>
[MarshalAs(UnmanagedType.LPStr)]
public string lexicon;

[MarshalAs(UnmanagedType.LPStr)]
public string tokens;

/// <summary>
/// VITS模型的noise_scale (float,默认值= 0.667)
/// </summary>
public float noise_scale = 0.667f;
/// <summary>
/// VITS模型的noise_scale_w (float,默认值= 0.8)
/// </summary>
public float noise_scale_w = 0.8f;
/// <summary>
/// 演讲的速度。大→慢;小→更快。(float, default = 1)
/// </summary>
public float length_scale = 1f;

public SherpaOnnxOfflineTtsVitsModelConfig()
{
noise_scale = 0.667f;
noise_scale_w = 0.8f;
length_scale = 1f;

model = "vits-zh-aishell3/vits-aishell3.onnx";
lexicon = "vits-zh-aishell3/lexicon.txt";
tokens = "vits-zh-aishell3/tokens.txt";
}
}
}
Loading

0 comments on commit 4115f97

Please sign in to comment.