1.3.0 ( 8/* - Renewal Process... )

Bebra777228 · Nov 13, 2024 · 636a9e2 · 636a9e2
1 parent d3c3b85
commit 636a9e2
Show file tree

Hide file tree

Showing 8 changed files with 107 additions and 248 deletions.
diff --git a/app.py b/app.py
@@ -4,7 +4,7 @@
 import gradio as gr
 
 from tabs.edge_tts.edge_tts import edge_tts_tab
-from tabs.inference.inference_batch import inference_batch_tab
+#from tabs.inference.inference_batch import inference_batch_tab
 from tabs.inference.inference_single import inference_single_tab
 from tabs.install.install import (
     files_upload,
@@ -40,8 +40,8 @@
     with gr.Tab("Преобразование голоса"):
         with gr.Tab("Одиночное преобразование"):
             inference_single_tab()
-        with gr.Tab("Пакетное преобразование"):
-            inference_batch_tab()
+        #with gr.Tab("Пакетное преобразование"):
+        #    inference_batch_tab()
 
     with gr.Tab("Преобразование текста в речь"):
         edge_tts_tab()

diff --git a/app_offline.py b/app_offline.py
@@ -3,7 +3,7 @@
 
 import gradio as gr
 
-from tabs.inference.inference_batch import inference_batch_tab
+#from tabs.inference.inference_batch import inference_batch_tab
 from tabs.inference.inference_single import inference_single_tab
 from tabs.install.install import files_upload, output_message, zip_upload
 from tabs.welcome import welcome_tab
@@ -32,8 +32,8 @@
     with gr.Tab("Преобразование голоса"):
         with gr.Tab("Одиночное преобразование"):
             inference_single_tab()
-        with gr.Tab("Пакетное преобразование"):
-            inference_batch_tab()
+        #with gr.Tab("Пакетное преобразование"):
+        #    inference_batch_tab()
 
     with gr.Tab("Загрузка RVC моделей"):
         zip_upload(output_message_component)

diff --git a/rvc/cli/edge_tts_cli.py b/rvc/cli/edge_tts_cli.py
@@ -1,9 +1,6 @@
 import argparse
 import os
-
-from tabs.edge_tts.edge_tts import edge_tts_pipeline
-
-rvc_models_dir = os.path.join(os.getcwd(), "models")
+from rvc.infer.infer import rvc_infer, RVC_MODELS_DIR
 
 parser = argparse.ArgumentParser(
     description="Замена голоса в директории output/", add_help=True
@@ -18,31 +15,32 @@
 parser.add_argument("-f0", "--method", type=str, default="rmvpe+")
 parser.add_argument("-hop", "--hop_length", type=int, default=128)
 parser.add_argument("-pro", "--protect", type=float, default=0.33)
-parser.add_argument("-f0min", "--f0_min", type=int, default="50")
-parser.add_argument("-f0max", "--f0_max", type=int, default="1100")
+parser.add_argument("-f0min", "--f0_min", type=int, default=50)
+parser.add_argument("-f0max", "--f0_max", type=int, default=1100)
 parser.add_argument("-f", "--format", type=str, default="mp3")
 args = parser.parse_args()
 
 model_name = args.model_name
-if not os.path.exists(os.path.join(rvc_models_dir, model_name)):
+if not os.path.exists(os.path.join(RVC_MODELS_DIR, model_name)):
     raise Exception(
         f"\033[91mМодели {model_name} не существует. Возможно, вы неправильно ввели имя.\033[0m"
     )
 
-cover_path = edge_tts_pipeline(
-    text=args.text_input,
+cover_path = rvc_infer(
     voice_model=model_name,
-    voice=args.tts_voice,
-    pitch=args.pitch,
+    input_path_or_text=args.text_input,
     index_rate=args.index_rate,
+    pitch=args.pitch,
+    f0_method=args.method,
     filter_radius=args.filter_radius,
     volume_envelope=args.volume_envelope,
-    f0_method=args.method,
-    hop_length=args.hop_length,
     protect=args.protect,
+    hop_length=args.hop_length,
     f0_min=args.f0_min,
     f0_max=args.f0_max,
     output_format=args.format,
+    is_tts=True,
+    voice=args.tts_voice,
 )
 
 print("\033[1;92m\nГолос успешно заменен!\n\033[0m")
diff --git a/rvc/cli/rvc_cli.py b/rvc/cli/rvc_cli.py
@@ -1,9 +1,6 @@
 import argparse
 import os
-
-from tabs.inference.inference_single import voice_pipeline_single
-
-rvc_models_dir = os.path.join(os.getcwd(), "models")
+from rvc.infer.infer import rvc_infer, RVC_MODELS_DIR
 
 parser = argparse.ArgumentParser(
     description="Замена голоса в директории output/", add_help=True
@@ -17,27 +14,27 @@
 parser.add_argument("-f0", "--method", type=str, default="rmvpe+")
 parser.add_argument("-hop", "--hop_length", type=int, default=128)
 parser.add_argument("-pro", "--protect", type=float, default=0.33)
-parser.add_argument("-f0min", "--f0_min", type=int, default="50")
-parser.add_argument("-f0max", "--f0_max", type=int, default="1100")
+parser.add_argument("-f0min", "--f0_min", type=int, default=50)
+parser.add_argument("-f0max", "--f0_max", type=int, default=1100)
 parser.add_argument("-f", "--format", type=str, default="mp3")
 args = parser.parse_args()
 
 model_name = args.model_name
-if not os.path.exists(os.path.join(rvc_models_dir, model_name)):
+if not os.path.exists(os.path.join(RVC_MODELS_DIR, model_name)):
     raise Exception(
         f"\033[91mМодели {model_name} не существует. Возможно, вы неправильно ввели имя.\033[0m"
     )
 
-cover_path = voice_pipeline_single(
-    uploaded_file=args.song_input,
+cover_path = rvc_infer(
     voice_model=model_name,
-    pitch=args.pitch,
+    input_path_or_text=args.song_input,
     index_rate=args.index_rate,
+    pitch=args.pitch,
+    f0_method=args.method,
     filter_radius=args.filter_radius,
     volume_envelope=args.volume_envelope,
-    f0_method=args.method,
-    hop_length=args.hop_length,
     protect=args.protect,
+    hop_length=args.hop_length,
     f0_min=args.f0_min,
     f0_max=args.f0_max,
     output_format=args.format,

diff --git a/rvc/infer/infer.py b/rvc/infer/infer.py
@@ -1,29 +1,28 @@
-import gc
 import os
-
+import gc
+import torch
 import librosa
 import numpy as np
 import soundfile as sf
-import torch
 from fairseq import checkpoint_utils
 from pydub import AudioSegment
 from scipy.io import wavfile
+import asyncio
+import edge_tts
+import gradio as gr
 
-from rvc.infer.config import Config
-from rvc.infer.pipeline import VC
 from rvc.lib.algorithm.synthesizers import Synthesizer
 from rvc.lib.my_utils import load_audio
+from rvc.infer.config import Config
+from rvc.infer.pipeline import VC
 
 # Инициализация конфигурации
 config = Config()
 
-
+# Определение путей к директориям с моделями
 RVC_MODELS_DIR = os.path.join(os.getcwd(), "models")
-EMBEDDERS_DIR = os.path.join(os.getcwd(), "rvc", "models", "embedders")
-HUBERT_BASE_PATH = os.path.join(EMBEDDERS_DIR, "hubert_base.pt")
-
-os.makedirs(EMBEDDERS_DIR, exist_ok=True)
-os.makedirs(RVC_MODELS_DIR, exist_ok=True)
+HUBERT_PATH = os.path.join(os.getcwd(), "rvc", "models", "embedders", "hubert_base.pt")
+OUTPUT_DIR = os.path.join(os.getcwd(), "output", "converted_audio")
 
 
 # Загружает модель RVC и индекс по имени модели.
@@ -104,6 +103,12 @@ def get_vc(model_path):
     return cpt, version, net_g, tgt_sr, vc
 
 
+# Функция для синтеза речи с помощью Edge TTS
+async def text_to_speech(text, voice, output_path):
+    communicate = edge_tts.Communicate(text=text, voice=voice)
+    await communicate.save(output_path)
+
+
 # Конвертирует аудиофайл в стерео формат.
 def convert_to_stereo(input_path, output_path):
     # Загружаем аудиофайл
@@ -120,19 +125,22 @@ def convert_to_stereo(input_path, output_path):
 
 
 # Конвертирует аудиофайл в выбранный пользователем формат.
-def convert_to_user_format(input_path, output_path, output_format):
+def convert_to_user_format(input_path, base_name, output_format):
     # Загружаем аудиофайл
     audio = AudioSegment.from_file(input_path)
+
+    output_name = os.path.splitext(os.path.basename(base_name))[0]
+    output_path = os.path.join(OUTPUT_DIR, f"{output_name}_(Converted).{output_format}")
 
     # Сохраняем аудиофайл в выбранном формате
     audio.export(output_path, format=output_format)
+    return output_path
 
 
 # Выполняет инференс с использованием RVC
 def rvc_infer(
     voice_model,
-    input_path,
-    output_dir,
+    input_path_or_text,
     index_rate,
     pitch,
     f0_method,
@@ -143,24 +151,41 @@ def rvc_infer(
     f0_min,
     f0_max,
     output_format,
+    is_tts=False,
+    voice=None,
+    progress=gr.Progress(track_tqdm=True),
 ):
+    progress(0, "Запуск конвейера генерации...")
+
     # Загружаем модель Hubert
-    hubert_model = load_hubert(HUBERT_BASE_PATH)
+    progress(0.1, "Загрузка Hubert модели...")
+    hubert_model = load_hubert(HUBERT_PATH)
+
     # Загружаем модель RVC и индекс
+    progress(0.2, "Загрузка RVC модели...")
     model_path, index_path = load_rvc_model(voice_model)
+
     # Получаем конвертер голоса
+    progress(0.3, "Получение конвертера голоса...")
     cpt, version, net_g, tgt_sr, vc = get_vc(model_path)
+
+    if is_tts:
+        # Синтез речи с помощью Edge TTS
+        progress(0.4, "Синтез речи...")
+        tts_voice_path = os.path.join(OUTPUT_DIR, "TTS_Voice.wav")
+        asyncio.run(text_to_speech(input_path_or_text, voice, tts_voice_path))
+        output_path = os.path.join(OUTPUT_DIR, f"TTS_Voice_Converted.wav")
+        input_path = tts_voice_path
+    else:
+        output_path = os.path.join(OUTPUT_DIR, f"Voice_Converted.wav")
+        input_path = input_path_or_text
+
     # Загружаем аудиофайл
+    progress(0.5, "Загрузка аудиофайла...")
     audio = load_audio(input_path, 16000)
 
-    # Формируем имя выходного файла
-    base_name = os.path.splitext(os.path.basename(input_path))[0]
-    temp_output_path = os.path.join(output_dir, f"{base_name}_(Converted).flac")
-    final_output_path = os.path.join(
-        output_dir, f"{base_name}_(Converted).{output_format}"
-    )
-
     # Выполняем конвертацию голоса
+    progress(0.6, "Преобразование голоса...")
     audio_opt = vc.pipeline(
         hubert_model,
         net_g,
@@ -181,84 +206,25 @@ def rvc_infer(
         f0_max,
         f0_file=None,
     )
-    # Сохраняем результат в файл
-    wavfile.write(temp_output_path, tgt_sr, audio_opt)
 
-    # Конвертируем файл в стерео формат с использованием scipy
-    convert_to_stereo(temp_output_path, temp_output_path)
+    # Сохраняем результат в файл
+    progress(0.7, "Сохранение результата...")
+    wavfile.write(output_path, tgt_sr, audio_opt)
 
-    # Конвертируем файл в выбранный пользователем формат
-    convert_to_user_format(temp_output_path, final_output_path, output_format)
+    # Конвертируем файл в стерео и в выбранный формат
+    progress(0.8, "Подготовка аудио выводу...")
+    convert_to_stereo(output_path, output_path)
+    final_output_path = convert_to_user_format(output_path, input_path, output_format)
 
     # Удаляем временный файл
-    os.remove(temp_output_path)
+    if os.path.exists(output_path):
+        os.remove(output_path)
 
     # Освобождаем память
+    progress(0.9, "Освобождение памяти...")
     del hubert_model, cpt, net_g, vc
     gc.collect()
     torch.cuda.empty_cache()
 
-    return final_output_path  # Возвращаем путь к выходному файлу
-
-
-# Выполняет пакетное преобразование файлов с использованием rvc_infer
-def rvc_infer_batch(
-    voice_model,
-    input_dir,
-    output_dir,
-    index_rate,
-    pitch,
-    f0_method,
-    filter_radius,
-    volume_envelope,
-    protect,
-    hop_length,
-    f0_min,
-    f0_max,
-    output_format,
-):
-    # Получаем список файлов в директории input_dir
-    input_files = [
-        f
-        for f in os.listdir(input_dir)
-        if f.endswith(
-            (
-                "wav",
-                "mp3",
-                "flac",
-                "ogg",
-                "opus",
-                "m4a",
-                "mp4",
-                "aac",
-                "alac",
-                "wma",
-                "aiff",
-                "webm",
-                "ac3",
-            )
-        )
-    ]
-
-    for input_file in input_files:
-        # Формируем пути к входному и выходному файлам
-        input_path = os.path.join(input_dir, input_file)
-
-        print(f"Преобразование {input_file}...")
-
-        # Выполняем преобразование для текущего файла
-        rvc_infer(
-            voice_model,
-            input_path,
-            output_dir,
-            index_rate,
-            pitch,
-            f0_method,
-            filter_radius,
-            volume_envelope,
-            protect,
-            hop_length,
-            f0_min,
-            f0_max,
-            output_format,
-        )
+    progress(1, "Готово!")
+    return tts_voice_path if is_tts else None, final_output_path