diff --git a/examples/api/main.py b/examples/api/main.py index a1bb568f9..c213735b6 100644 --- a/examples/api/main.py +++ b/examples/api/main.py @@ -17,7 +17,7 @@ import ChatTTS -from tools.audio import wav_arr_to_mp3_view +from tools.audio import pcm_arr_to_mp3_view from tools.logger import get_logger import torch @@ -101,7 +101,7 @@ async def generate_voice(params: ChatTTSParams): buf, "a", compression=zipfile.ZIP_DEFLATED, allowZip64=False ) as f: for idx, wav in enumerate(wavs): - f.writestr(f"{idx}.mp3", wav_arr_to_mp3_view(wav)) + f.writestr(f"{idx}.mp3", pcm_arr_to_mp3_view(wav)) logger.info("Audio generation successful.") buf.seek(0) diff --git a/examples/cmd/run.py b/examples/cmd/run.py index b9e82b09d..8a2bb68b1 100644 --- a/examples/cmd/run.py +++ b/examples/cmd/run.py @@ -13,14 +13,14 @@ import ChatTTS -from tools.audio import wav_arr_to_mp3_view +from tools.audio import pcm_arr_to_mp3_view from tools.logger import get_logger logger = get_logger("Command") def save_mp3_file(wav, index): - data = wav_arr_to_mp3_view(wav) + data = pcm_arr_to_mp3_view(wav) mp3_filename = f"output_audio_{index}.mp3" with open(mp3_filename, "wb") as f: f.write(data) diff --git a/examples/web/funcs.py b/examples/web/funcs.py index c173b6272..52a569f74 100644 --- a/examples/web/funcs.py +++ b/examples/web/funcs.py @@ -4,7 +4,6 @@ from time import sleep import gradio as gr -import numpy as np from tools.audio import float_to_int16, has_ffmpeg_installed from tools.logger import get_logger diff --git a/examples/web/webui.py b/examples/web/webui.py index 800618a52..f8863a449 100644 --- a/examples/web/webui.py +++ b/examples/web/webui.py @@ -155,6 +155,9 @@ def make_audio(autoplay, stream): streaming=stream, interactive=False, show_label=True, + waveform_options=gr.WaveformOptions( + sample_rate=24000, + ), ) generate_button.click( fn=set_buttons_before_generate, diff --git a/tools/audio/__init__.py b/tools/audio/__init__.py index f9c785735..5b9f03bc4 100644 --- a/tools/audio/__init__.py +++ b/tools/audio/__init__.py @@ -1,3 +1,3 @@ -from .mp3 import wav_arr_to_mp3_view +from .pcm import pcm_arr_to_mp3_view from .ffmpeg import has_ffmpeg_installed from .np import float_to_int16 diff --git a/tools/audio/mp3.py b/tools/audio/pcm.py similarity index 91% rename from tools/audio/mp3.py rename to tools/audio/pcm.py index f08793cc1..0b1927591 100644 --- a/tools/audio/mp3.py +++ b/tools/audio/pcm.py @@ -7,7 +7,7 @@ from .av import wav2 -def wav_arr_to_mp3_view(wav: np.ndarray): +def pcm_arr_to_mp3_view(wav: np.ndarray): buf = BytesIO() with wave.open(buf, "wb") as wf: wf.setnchannels(1) # Mono channel