Skip to content

Commit

Permalink
Improvements to loading dubbing sessions
Browse files Browse the repository at this point in the history
  • Loading branch information
lukaszliniewicz authored Nov 7, 2024
1 parent 9bed7f5 commit d2d4918
Showing 1 changed file with 131 additions and 40 deletions.
171 changes: 131 additions & 40 deletions pandrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2054,7 +2054,6 @@ def refresh_audio_tracks(self):
except ffmpeg.Error as e:
messagebox.showerror("FFmpeg Error", f"An error occurred while probing the video file: {str(e)}")


def add_dubbing_to_video(self):
if not self.session_name.get():
CTkMessagebox(title="No Session", message="Please create or load a session before adding dubbing to video.", icon="info")
Expand All @@ -2063,6 +2062,28 @@ def add_dubbing_to_video(self):
session_name = self.session_name.get()
session_dir = os.path.abspath(os.path.join("Outputs", session_name))

# Files to check for and remove (EXCLUDING Sentence_wavs)
files_to_remove = [
"final_output.mp4", "original_audio.wav", "aligned_audio.wav",
"amplified_dubbed_audio.wav", "mixed_audio.wav"
] + [f for f in os.listdir(session_dir) if f.endswith("_final.mp4") or f.endswith("_equalized.srt")]

files_not_removed = []
for file_pattern in files_to_remove:
filepath = os.path.join(session_dir, file_pattern)
if os.path.exists(filepath):
try:
os.remove(filepath)
logging.info(f"Removed file: {filepath}")
except OSError as e:
files_not_removed.append(filepath)
logging.error(f"Could not remove {filepath}: {e}")

if files_not_removed:
message = "Could not remove the following files. Please close any programs using them and try again:\n" + "\n".join(files_not_removed)
CTkMessagebox(title="File Removal Error", message=message, icon="warning")
return

# Check if the required elements are present in the session folder
video_files = [f for f in os.listdir(session_dir) if f.lower().endswith(('.mp4', '.mkv', '.webm', '.avi', '.mov'))]
speech_blocks_files = [f for f in os.listdir(session_dir) if f.lower().endswith('_speech_blocks.json')]
Expand All @@ -2083,9 +2104,16 @@ def add_dubbing_to_video(self):
logging.info(f"Executing synchronization command: {' '.join(subdub_command)}")

try:
process = subprocess.Popen(subdub_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, encoding='utf-8', errors='replace')
process = subprocess.Popen(
subdub_command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
encoding='utf-8',
errors='replace'
)
for line in process.stdout:
print(line, end='')
print(line, end='') # Optionally, integrate with your GUI's output
logging.info(line.strip())
process.wait()
if process.returncode != 0:
Expand Down Expand Up @@ -2125,9 +2153,16 @@ def add_dubbing_to_video(self):
logging.info(f"Executing equalization command: {' '.join(equalize_command)}")

try:
process = subprocess.Popen(equalize_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, encoding='utf-8', errors='replace')
process = subprocess.Popen(
equalize_command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
encoding='utf-8',
errors='replace'
)
for line in process.stdout:
print(line, end='')
print(line, end='') # Optionally, integrate with your GUI's output
logging.info(line.strip())
process.wait()
if process.returncode != 0:
Expand All @@ -2149,30 +2184,52 @@ def add_dubbing_to_video(self):
return
equalized_srt_path = os.path.join(session_dir, equalized_srt_files[0])

# Add the equalized subtitles to the synced video
# Add the equalized subtitles to the synced video using FFmpeg
output_video_path = os.path.join(session_dir, f"{session_name}_final.mp4")
ffmpeg_command = [
"ffmpeg",
"-y", # Overwrite output file if it exists
"-i", synced_video_path,
"-i", equalized_srt_path,
"-c", "copy",
"-c:s", "mov_text",
"-metadata:s:s:0", "language=eng", # Optional: Set subtitle language
output_video_path
]

logging.info(f"Executing FFmpeg command to add subtitles: {' '.join(ffmpeg_command)}")

try:
result = subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True, encoding='utf-8', errors='replace')
logging.info("Subtitles added successfully.")
CTkMessagebox(title="Success", message=f"Dubbing and subtitles have been added. The final video is available at: {output_video_path}")
ffmpeg_process = subprocess.Popen(
ffmpeg_command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
encoding='utf-8',
errors='replace'
)
for line in ffmpeg_process.stdout:
print(line, end='') # Optionally, integrate with your GUI's output
logging.info(line.strip())
ffmpeg_process.wait()
if ffmpeg_process.returncode != 0:
raise subprocess.CalledProcessError(ffmpeg_process.returncode, ffmpeg_command)
logging.info("Subtitles have been successfully embedded into the final video.")

# Notify the user of success
CTkMessagebox(
title="Success",
message=f"Dubbing and subtitles have been added. The final video is available at:\n{output_video_path}"
)

except subprocess.CalledProcessError as e:
logging.error(f"Failed to add subtitles: {e.stderr}")
CTkMessagebox(title="Error", message=f"Failed to add subtitles: {e.stderr}")
logging.error(f"Failed to add subtitles: {e.output}")
CTkMessagebox(title="Error", message=f"Failed to add subtitles: {e.output}")
except Exception as e:
logging.error(f"An unexpected error occurred while adding subtitles: {str(e)}")
CTkMessagebox(title="Error", message=f"An unexpected error occurred while adding subtitles: {str(e)}")


def generate_dubbing_audio(self):
if not self.session_name.get():
CTkMessagebox(title="No Session", message="Please create or load a session before generating dubbing audio.", icon="info")
Expand Down Expand Up @@ -2284,9 +2341,27 @@ def only_transcribe(self):
gpu_name = torch.cuda.get_device_name(0).lower()
pascal_gpus = ['1060', '1070', '1080', '1660', '1650']
use_int8 = any(gpu in gpu_name for gpu in pascal_gpus)

def run_whisperx_command(command):
try:
whisperx_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
stdout, stderr = whisperx_process.communicate()

logging.info(f"Whisperx stdout: {stdout}")
if stderr:
logging.error(f"Whisperx stderr: {stderr}")

if whisperx_process.returncode != 0:
if "expects each tensor to be equal size" in stderr:
return False
raise subprocess.CalledProcessError(whisperx_process.returncode, command, stderr)
return True
except subprocess.CalledProcessError as e:
if "expects each tensor to be equal size" in str(e.stderr):
return False
raise e

# Transcription using the WAV file
output_srt = os.path.join(session_dir, f"{video_filename}.srt")
# Initial whisperx command
whisperx_command = [
"../conda/Scripts/conda.exe", "run", "-p", "../conda/envs/whisperx_installer", "--no-capture-output",
"python", "-m", "whisperx",
Expand All @@ -2301,17 +2376,17 @@ def only_transcribe(self):
if use_int8:
whisperx_command.extend(["--compute_type", "int8"])

logging.info(f"Executing transcription command: {' '.join(whisperx_command)}")

whisperx_process = subprocess.Popen(whisperx_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
stdout, stderr = whisperx_process.communicate()

logging.info(f"Whisperx stdout: {stdout}")
if stderr:
logging.error(f"Whisperx stderr: {stderr}")
logging.info(f"Executing initial transcription command: {' '.join(whisperx_command)}")

if whisperx_process.returncode != 0:
raise subprocess.CalledProcessError(whisperx_process.returncode, whisperx_command, stderr)
# Try initial transcription
if not run_whisperx_command(whisperx_command):
# If failed, retry with batch_size 1
logging.info("Initial transcription failed. Retrying with batch_size 1")
whisperx_command.extend(["--batch_size", "1"])
logging.info(f"Executing fallback transcription command: {' '.join(whisperx_command)}")

if not run_whisperx_command(whisperx_command):
raise Exception("Transcription failed even with batch_size 1")

logging.info("Transcription completed successfully.")

Expand Down Expand Up @@ -2480,17 +2555,16 @@ def select_video_file(self):
session_name = self.session_name.get()
session_dir = os.path.join("Outputs", session_name)

# Ensure the session directory exists
os.makedirs(session_dir, exist_ok=True)

# Get the filename of the selected video
# Get the target path in the session directory
video_filename = os.path.basename(video_file)

# Copy the video file to the session directory
destination_path = os.path.join(session_dir, video_filename)
shutil.copy(video_file, destination_path)

# Update the selected video file entry
# Only copy if the file isn't already in the session directory
if os.path.dirname(os.path.abspath(video_file)) != os.path.abspath(session_dir):
os.makedirs(session_dir, exist_ok=True)
shutil.copy(video_file, destination_path)

# Update the selected video file entry with the path
self.selected_video_file.set(destination_path)


Expand Down Expand Up @@ -4004,28 +4078,45 @@ def start_optimisation(self, total_sentences, current_sentence=0):

# Update the remaining time label
self.master.after(0, self.update_remaining_time_label, estimated_remaining_time)
# Save the final concatenated audio file only if the source file is not an srt file
if self.enable_dubbing.get() and self.source_file.endswith(".srt"):
self.start_dubbing()

# Check if this is a dubbing workflow - check both source and pre_selected for srt and video files
is_dubbing_workflow = (self.pre_selected_source_file and
self.pre_selected_source_file.lower().endswith(
(".srt", ".mp4", ".mkv", ".webm", ".avi", ".mov")
))

# Calculate total generation time
total_generation_time = sum(sentence_generation_times)
formatted_time = str(datetime.timedelta(seconds=int(total_generation_time)))

if is_dubbing_workflow:
CTkMessagebox(
title="Generation Finished",
message=(f"Speech generation completed!\n\n"
f"Total Generation Time: {formatted_time}\n\n"
"Click 'Add Dubbing to Video' to create the final dubbed video with subtitles "
"once you reviewed the generated audio and are happy with the results."),
icon="info"
)
else:
# Regular workflow - save concatenated audio
session_name = self.session_name.get()
output_format = self.output_format.get()
session_dir = os.path.join("Outputs", session_name)
default_output_path = os.path.join(session_dir, f"{session_name}.{output_format}")

# Call save_output with a default path
final_output_path = self.save_output(auto_path=default_output_path)

if final_output_path:
logging.info(f"The output file has been saved as {final_output_path}")
CTkMessagebox(
title="Generation Finished",
message=f"Generation completed!\n\nTotal Generation Time: {formatted_time}",
icon="info"
)
else:
logging.warning("Failed to save the output file")

# Calculate and display the total generation time
total_generation_time = sum(sentence_generation_times)
formatted_time = str(datetime.timedelta(seconds=int(total_generation_time)))
CTkMessagebox(title="Generation Finished", message=f"Generation completed!\n\nTotal Generation Time: {formatted_time}", icon="info")

def save_sentence_to_json(self, preprocessed_sentences, json_filename, sentence_index, sentence_dict):
# Update the tts_generated flag for the current sentence
sentence_dict["tts_generated"] = "yes"
Expand Down

4 comments on commit d2d4918

@JohnF51
Copy link

@JohnF51 JohnF51 commented on d2d4918 Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update from 0.29 works fine, I'm going to test subtitles and train XTTS voice. Thanks for your hard work. Please if you can add shortcuts to regenerate and play, it would help me a lot. Thank you

@lukaszliniewicz
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. If you're updating to 0.295, please download the new executable from the release and update Pandrator using it (if you haven't done so already). I will work on the shortcuts.

@JohnF51
Copy link

@JohnF51 JohnF51 commented on d2d4918 Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've already done it and it works excellently. My experience is that it works best when I start a new install from the launcher exe. I have tried both train voice and dubbing and version 0.295 is very good. So far everything works for me. Once again great job. I have a PC with RX3060 but I also have a PC with an older NVIDIA GeForce GTX 950 card is there any version that can work with this card? Thank you also for those keyboard shortcuts.

@lukaszliniewicz
Copy link
Owner Author

@lukaszliniewicz lukaszliniewicz commented on d2d4918 Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I think the 950 has 2GB of VRAM, so it's definitely not enough for WhisperX (and so training and dubbing), but it may work with XTTS if you launch it with the lowvram option checked. You'd have to experiment with it. It might be quite slow, but perhaps faster than the CPU. The lowest I tried was a laptop 3050 with 4GB of VRAM. XTTS worked quite well. There may also be some problems with software (the CUDA version used by the XTTS server), though I think this should be ok.

Please sign in to comment.