diff --git a/AutoSubs-App/src-tauri/resources/AutoSubs V2.lua b/AutoSubs-App/src-tauri/resources/AutoSubs V2.lua index 48f2aa4..e1b78ee 100644 --- a/AutoSubs-App/src-tauri/resources/AutoSubs V2.lua +++ b/AutoSubs-App/src-tauri/resources/AutoSubs V2.lua @@ -10,7 +10,7 @@ print("Operating System: " .. os_name) -- Function to read a JSON file local function read_json_file(file_path) local file = assert(io.open(file_path, "r")) -- Open file for reading - local content = file:read("*a") -- Read the entire file content + local content = file:read("*a") -- Read the entire file content file:close() -- Parse the JSON content @@ -50,10 +50,9 @@ if os_name == "Windows" then -- Windows commands to open and close app using terminal commands command_open = 'start "" "' .. mainApp .. '"' command_close = 'powershell -Command "Get-Process AutoSubs | Stop-Process -Force"' - elseif os_name == "OSX" then storagePath = os.getenv("HOME") .. - "/Library/Application Support/Blackmagic Design/DaVinci Resolve/Fusion/Scripts/Utility/AutoSubs/" + "/Library/Application Support/Blackmagic Design/DaVinci Resolve/Fusion/Scripts/Utility/AutoSubs/" local file = assert(io.open(storagePath .. "install_path.txt", "r")) local install_path = file:read("*l") @@ -79,7 +78,7 @@ local mediaPool = project:GetMediaPool() function CreateResponse(body) local header = "HTTP/1.1 200 OK\r\n" .. "Server: ljsocket/0.1\r\n" .. "Content-Type: application/json\r\n" .. - "Content-Length: " .. #body .. "\r\n" .. "Connection: close\r\n" .. "\r\n" + "Content-Length: " .. #body .. "\r\n" .. "Connection: close\r\n" .. "\r\n" local response = header .. body return response @@ -262,9 +261,23 @@ function ExportAudio(outputDir) local success, err = pcall(function() resolve:ImportRenderPreset(storagePath .. "render-audio-only.xml") project:LoadRenderPreset('render-audio-only') + project:SetRenderSettings({ TargetDir = outputDir }) + end) + + if not success then + project:LoadRenderPreset('Audio Only') project:SetRenderSettings({ - TargetDir = outputDir + TargetDir = outputDir, + CustomName = "autosubs-exported-audio", + RenderMode = "Single clip", + IsExportVideo = false, + IsExportAudio = true, + AudioBitDepth = 24, + AudioSampleRate = 44100 }) + end + + pcall(function() local pid = project:AddRenderJob() project:StartRendering(pid) @@ -385,7 +398,7 @@ function AddSubtitles(filePath, trackIndex, templateName, textFormat, removePunc end end - local timelineItem = mediaPool:AppendToTimeline({newClip})[1] + local timelineItem = mediaPool:AppendToTimeline({ newClip })[1] local subtitle = subtitles[i] local subtitleText = subtitle["text"] @@ -439,7 +452,7 @@ function AddSubtitles(filePath, trackIndex, templateName, textFormat, removePunc end) if not success then - print("Error adding subtitle:", err) + print("Attempted to add subtitle on top of existing timeline item. Please select an empty track.") end end end @@ -465,7 +478,24 @@ assert(server:set_option("nodelay", true, "tcp")) assert(server:set_option("reuseaddr", true)) -- Bind and listen -assert(server:bind(info)) +local success, err = pcall(function() + assert(server:bind(info)) +end) + +if not success then + os.execute([[ + curl --request POST \ + --url http://localhost:55010/ \ + --header 'Content-Type: application/json' \ + --header 'content-type: application/json' \ + --data '{ + "func":"Exit" + }' + ]]) + sleep(0.5) + assert(server:bind(info)) +end + assert(server:listen()) -- Start AutoSubs app diff --git a/AutoSubs-App/src/GlobalContext.tsx b/AutoSubs-App/src/GlobalContext.tsx index 7c48517..fd49d84 100644 --- a/AutoSubs-App/src/GlobalContext.tsx +++ b/AutoSubs-App/src/GlobalContext.tsx @@ -1,7 +1,7 @@ import { useEffect, createContext, useState, useContext, useRef } from 'react'; import { fetch } from '@tauri-apps/plugin-http'; -import { BaseDirectory, readTextFile, exists, writeTextFile, mkdir } from '@tauri-apps/plugin-fs'; -import { join, documentDir } from '@tauri-apps/api/path'; +import { BaseDirectory, readTextFile, exists, writeTextFile } from '@tauri-apps/plugin-fs'; +import { join, documentDir, downloadDir } from '@tauri-apps/api/path'; import { save } from '@tauri-apps/plugin-dialog'; import { Subtitle, Speaker, TopSpeaker } from "@/types/interfaces"; import { load, Store } from '@tauri-apps/plugin-store'; @@ -105,7 +105,7 @@ export function GlobalProvider({ children }: React.PropsWithChildren<{}>) { const [markIn, setMarkIn] = useState(0); const [model, setModel] = useState("small"); - const [currentLanguage, setLanguage] = useState("en"); + const [currentLanguage, setLanguage] = useState(""); const [currentTemplate, setTemplate] = useState(""); const [currentTrack, setTrack] = useState(""); const [translate, setTranslate] = useState(false); @@ -119,11 +119,6 @@ export function GlobalProvider({ children }: React.PropsWithChildren<{}>) { async function setTranscriptsFolder() { storageDir = await join(await documentDir(), "AutoSubs"); - // create directory - const dirExists = await exists(storageDir, { baseDir: BaseDirectory.Document }); - if (!dirExists) { - await mkdir(storageDir, { baseDir: BaseDirectory.Document, recursive: true }); - } } async function getFullTranscriptPath() { @@ -138,7 +133,7 @@ export function GlobalProvider({ children }: React.PropsWithChildren<{}>) { try { setModel(await store.get('model') || "small"); - setLanguage(await store.get('currentLanguage') || "en"); + setLanguage(await store.get('currentLanguage') || ""); setTemplate(await store.get('currentTemplate') || ""); setTrack(await store.get('currentTrack') || ""); setTranslate(await store.get('translate') || false); @@ -388,14 +383,14 @@ export function GlobalProvider({ children }: React.PropsWithChildren<{}>) { }, body: JSON.stringify({ func: "ExportAudio", - outputDir: storageDir + outputDir: await downloadDir(), }), }); const data = await response.json(); if (data.timeline == "") { - throw new Error("You need to open a timeline in Resolve to start transcribing."); + throw new Error("Failed to export audio. You must have a timeline open in Resolve to start transcribing."); } setTimeline(data.timeline); diff --git a/AutoSubs-App/src/pages/diarize-page.tsx b/AutoSubs-App/src/pages/diarize-page.tsx index 3f0d812..634d4e5 100644 --- a/AutoSubs-App/src/pages/diarize-page.tsx +++ b/AutoSubs-App/src/pages/diarize-page.tsx @@ -331,7 +331,7 @@ export function DiarizePage() { value && setTextFormat(value)} + onValueChange={(value: string) => value && setTextFormat(value)} className="grid grid-cols-3 gap-3 h-20" > value && setTextFormat(value)} + onValueChange={(value: string) => value && setTextFormat(value)} className="grid grid-cols-3 gap-3 h-20" > setRemovePunctuation(checked)} />
- + setSensitiveWords(e.target.value)} />
@@ -690,7 +690,7 @@ export function HomePage() { Force Align Words

- Warning: May be unstable + Improve word level timing

setAlignWords(checked)} /> diff --git a/Transcription-Server/server.py b/Transcription-Server/server.py index c423640..2098563 100644 --- a/Transcription-Server/server.py +++ b/Transcription-Server/server.py @@ -215,17 +215,19 @@ def transcribe_audio(audio_file, kwargs, max_words, max_chars, sensitive_words): kwargs["model"], device=kwargs["device"], compute_type=compute_type) if kwargs["language"] == "auto": result = model.transcribe_stable( - audio_file, task=kwargs["task"], verbose=True, vad_filter=True, progress_callback=log_progress) + audio_file, task=kwargs["task"], regroup=True, verbose=True, vad_filter=True, progress_callback=log_progress) else: result = model.transcribe_stable( - audio_file, language=kwargs["language"], task=kwargs["task"], verbose=True, vad_filter=True, progress_callback=log_progress) + audio_file, language=kwargs["language"], task=kwargs["task"], regroup=True, verbose=True, vad_filter=True, progress_callback=log_progress) model.align(audio_file, result, kwargs["language"]) if kwargs["align_words"]: model.align_words(audio_file, result, kwargs["language"]) + result.pad() else: result = stable_whisper.transcribe_any( - inference, audio_file, inference_kwargs=kwargs, vad=False) + inference, audio_file, inference_kwargs=kwargs, vad=False, regroup=True) + result.pad() result = modify_result(result, max_words, max_chars, sensitive_words) @@ -235,10 +237,17 @@ def transcribe_audio(audio_file, kwargs, max_words, max_chars, sensitive_words): def diarize_audio(audio_file, device): from pyannote.audio import Pipeline print("Starting diarization...") - pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1") - pipeline.to(device) - return pipeline(audio_file) - + try: + pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1") + pipeline.to(device) + return pipeline(audio_file) + except Exception as e: + error_message = f"failed to load diarization model. {e}" + print(error_message) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=error_message + ) def merge_diarisation(transcript, diarization): # Array of colors to choose from @@ -484,8 +493,13 @@ async def transcribe(request: TranscriptionRequest): json_filename = f"{timeline}.json" json_filepath = os.path.join(request.output_dir, json_filename) try: + # Create the directory if it doesn't exist + os.makedirs(request.output_dir, exist_ok=True) + + # Save the transcription to a JSON file with open(json_filepath, 'w', encoding='utf-8') as f: json.dump(result, f, indent=4, ensure_ascii=False) + print(f"Transcription saved to: {json_filepath}") except Exception as e: print(f"Error saving JSON file: {e}") @@ -550,10 +564,10 @@ def censor_word(result, seg_index, word_index): ( result - .split_by_punctuation([('.', ' '), '。', '?', '?', ',', ',']) - .split_by_gap(0.4) - .merge_by_gap(0.1, max_words=3) .split_by_length(max_words=max_words, max_chars=max_chars) + # .split_by_punctuation([('.', ' '), '。', '?', '?', ',', ',']) + # .split_by_gap(0.4) + # .merge_by_gap(0.1, max_words=3) ) return result