Skip to content

Commit

Permalink
General stability improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
tmoroney committed Dec 10, 2024
1 parent fb968f8 commit c4dda48
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 33 deletions.
46 changes: 38 additions & 8 deletions AutoSubs-App/src-tauri/resources/AutoSubs V2.lua
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ print("Operating System: " .. os_name)
-- Function to read a JSON file
local function read_json_file(file_path)
local file = assert(io.open(file_path, "r")) -- Open file for reading
local content = file:read("*a") -- Read the entire file content
local content = file:read("*a") -- Read the entire file content
file:close()

-- Parse the JSON content
Expand Down Expand Up @@ -50,10 +50,9 @@ if os_name == "Windows" then
-- Windows commands to open and close app using terminal commands
command_open = 'start "" "' .. mainApp .. '"'
command_close = 'powershell -Command "Get-Process AutoSubs | Stop-Process -Force"'

elseif os_name == "OSX" then
storagePath = os.getenv("HOME") ..
"/Library/Application Support/Blackmagic Design/DaVinci Resolve/Fusion/Scripts/Utility/AutoSubs/"
"/Library/Application Support/Blackmagic Design/DaVinci Resolve/Fusion/Scripts/Utility/AutoSubs/"

local file = assert(io.open(storagePath .. "install_path.txt", "r"))
local install_path = file:read("*l")
Expand All @@ -79,7 +78,7 @@ local mediaPool = project:GetMediaPool()

function CreateResponse(body)
local header = "HTTP/1.1 200 OK\r\n" .. "Server: ljsocket/0.1\r\n" .. "Content-Type: application/json\r\n" ..
"Content-Length: " .. #body .. "\r\n" .. "Connection: close\r\n" .. "\r\n"
"Content-Length: " .. #body .. "\r\n" .. "Connection: close\r\n" .. "\r\n"

local response = header .. body
return response
Expand Down Expand Up @@ -262,9 +261,23 @@ function ExportAudio(outputDir)
local success, err = pcall(function()
resolve:ImportRenderPreset(storagePath .. "render-audio-only.xml")
project:LoadRenderPreset('render-audio-only')
project:SetRenderSettings({ TargetDir = outputDir })
end)

if not success then
project:LoadRenderPreset('Audio Only')
project:SetRenderSettings({
TargetDir = outputDir
TargetDir = outputDir,
CustomName = "autosubs-exported-audio",
RenderMode = "Single clip",
IsExportVideo = false,
IsExportAudio = true,
AudioBitDepth = 24,
AudioSampleRate = 44100
})
end

pcall(function()
local pid = project:AddRenderJob()
project:StartRendering(pid)

Expand Down Expand Up @@ -385,7 +398,7 @@ function AddSubtitles(filePath, trackIndex, templateName, textFormat, removePunc
end
end

local timelineItem = mediaPool:AppendToTimeline({newClip})[1]
local timelineItem = mediaPool:AppendToTimeline({ newClip })[1]

local subtitle = subtitles[i]
local subtitleText = subtitle["text"]
Expand Down Expand Up @@ -439,7 +452,7 @@ function AddSubtitles(filePath, trackIndex, templateName, textFormat, removePunc
end)

if not success then
print("Error adding subtitle:", err)
print("Attempted to add subtitle on top of existing timeline item. Please select an empty track.")
end
end
end
Expand All @@ -465,7 +478,24 @@ assert(server:set_option("nodelay", true, "tcp"))
assert(server:set_option("reuseaddr", true))

-- Bind and listen
assert(server:bind(info))
local success, err = pcall(function()
assert(server:bind(info))
end)

if not success then
os.execute([[
curl --request POST \
--url http://localhost:55010/ \
--header 'Content-Type: application/json' \
--header 'content-type: application/json' \
--data '{
"func":"Exit"
}'
]])
sleep(0.5)
assert(server:bind(info))
end

assert(server:listen())

-- Start AutoSubs app
Expand Down
17 changes: 6 additions & 11 deletions AutoSubs-App/src/GlobalContext.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { useEffect, createContext, useState, useContext, useRef } from 'react';
import { fetch } from '@tauri-apps/plugin-http';
import { BaseDirectory, readTextFile, exists, writeTextFile, mkdir } from '@tauri-apps/plugin-fs';
import { join, documentDir } from '@tauri-apps/api/path';
import { BaseDirectory, readTextFile, exists, writeTextFile } from '@tauri-apps/plugin-fs';
import { join, documentDir, downloadDir } from '@tauri-apps/api/path';
import { save } from '@tauri-apps/plugin-dialog';
import { Subtitle, Speaker, TopSpeaker } from "@/types/interfaces";
import { load, Store } from '@tauri-apps/plugin-store';
Expand Down Expand Up @@ -105,7 +105,7 @@ export function GlobalProvider({ children }: React.PropsWithChildren<{}>) {

const [markIn, setMarkIn] = useState(0);
const [model, setModel] = useState("small");
const [currentLanguage, setLanguage] = useState("en");
const [currentLanguage, setLanguage] = useState("");
const [currentTemplate, setTemplate] = useState("");
const [currentTrack, setTrack] = useState("");
const [translate, setTranslate] = useState(false);
Expand All @@ -119,11 +119,6 @@ export function GlobalProvider({ children }: React.PropsWithChildren<{}>) {

async function setTranscriptsFolder() {
storageDir = await join(await documentDir(), "AutoSubs");
// create directory
const dirExists = await exists(storageDir, { baseDir: BaseDirectory.Document });
if (!dirExists) {
await mkdir(storageDir, { baseDir: BaseDirectory.Document, recursive: true });
}
}

async function getFullTranscriptPath() {
Expand All @@ -138,7 +133,7 @@ export function GlobalProvider({ children }: React.PropsWithChildren<{}>) {

try {
setModel(await store.get<string>('model') || "small");
setLanguage(await store.get<string>('currentLanguage') || "en");
setLanguage(await store.get<string>('currentLanguage') || "");
setTemplate(await store.get<string>('currentTemplate') || "");
setTrack(await store.get<string>('currentTrack') || "");
setTranslate(await store.get<boolean>('translate') || false);
Expand Down Expand Up @@ -388,14 +383,14 @@ export function GlobalProvider({ children }: React.PropsWithChildren<{}>) {
},
body: JSON.stringify({
func: "ExportAudio",
outputDir: storageDir
outputDir: await downloadDir(),
}),
});

const data = await response.json();

if (data.timeline == "") {
throw new Error("You need to open a timeline in Resolve to start transcribing.");
throw new Error("Failed to export audio. You must have a timeline open in Resolve to start transcribing.");
}

setTimeline(data.timeline);
Expand Down
2 changes: 1 addition & 1 deletion AutoSubs-App/src/pages/diarize-page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ export function DiarizePage() {
<ToggleGroup
type="single"
value={textFormat}
onValueChange={(value) => value && setTextFormat(value)}
onValueChange={(value: string) => value && setTextFormat(value)}
className="grid grid-cols-3 gap-3 h-20"
>
<ToggleGroupItem
Expand Down
6 changes: 3 additions & 3 deletions AutoSubs-App/src/pages/home-page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ export function HomePage() {
<ToggleGroup
type="single"
value={textFormat}
onValueChange={(value) => value && setTextFormat(value)}
onValueChange={(value: string) => value && setTextFormat(value)}
className="grid grid-cols-3 gap-3 h-20"
>
<ToggleGroupItem
Expand Down Expand Up @@ -670,7 +670,7 @@ export function HomePage() {
<Switch checked={removePunctuation} onCheckedChange={(checked) => setRemovePunctuation(checked)} />
</div>
<div className="grid gap-3">
<Label htmlFor="sensitiveWords">Sensored Words</Label>
<Label htmlFor="sensitiveWords">Censored Words</Label>
<Input value={sensitiveWords} id="sensitiveWords" type="string" placeholder="bomb, gun, kill" onChange={(e) => setSensitiveWords(e.target.value)} />
</div>
<div className="grid grid-cols-2 gap-4">
Expand All @@ -690,7 +690,7 @@ export function HomePage() {
Force Align Words
</p>
<p className="text-xs text-muted-foreground">
Warning: May be unstable
Improve word level timing
</p>
</div>
<Switch checked={alignWords} onCheckedChange={(checked) => setAlignWords(checked)} />
Expand Down
34 changes: 24 additions & 10 deletions Transcription-Server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,17 +215,19 @@ def transcribe_audio(audio_file, kwargs, max_words, max_chars, sensitive_words):
kwargs["model"], device=kwargs["device"], compute_type=compute_type)
if kwargs["language"] == "auto":
result = model.transcribe_stable(
audio_file, task=kwargs["task"], verbose=True, vad_filter=True, progress_callback=log_progress)
audio_file, task=kwargs["task"], regroup=True, verbose=True, vad_filter=True, progress_callback=log_progress)
else:
result = model.transcribe_stable(
audio_file, language=kwargs["language"], task=kwargs["task"], verbose=True, vad_filter=True, progress_callback=log_progress)
audio_file, language=kwargs["language"], task=kwargs["task"], regroup=True, verbose=True, vad_filter=True, progress_callback=log_progress)
model.align(audio_file, result, kwargs["language"])
if kwargs["align_words"]:
model.align_words(audio_file, result, kwargs["language"])
result.pad()

else:
result = stable_whisper.transcribe_any(
inference, audio_file, inference_kwargs=kwargs, vad=False)
inference, audio_file, inference_kwargs=kwargs, vad=False, regroup=True)
result.pad()

result = modify_result(result, max_words, max_chars, sensitive_words)

Expand All @@ -235,10 +237,17 @@ def transcribe_audio(audio_file, kwargs, max_words, max_chars, sensitive_words):
def diarize_audio(audio_file, device):
from pyannote.audio import Pipeline
print("Starting diarization...")
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
pipeline.to(device)
return pipeline(audio_file)

try:
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
pipeline.to(device)
return pipeline(audio_file)
except Exception as e:
error_message = f"failed to load diarization model. {e}"
print(error_message)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=error_message
)

def merge_diarisation(transcript, diarization):
# Array of colors to choose from
Expand Down Expand Up @@ -484,8 +493,13 @@ async def transcribe(request: TranscriptionRequest):
json_filename = f"{timeline}.json"
json_filepath = os.path.join(request.output_dir, json_filename)
try:
# Create the directory if it doesn't exist
os.makedirs(request.output_dir, exist_ok=True)

# Save the transcription to a JSON file
with open(json_filepath, 'w', encoding='utf-8') as f:
json.dump(result, f, indent=4, ensure_ascii=False)

print(f"Transcription saved to: {json_filepath}")
except Exception as e:
print(f"Error saving JSON file: {e}")
Expand Down Expand Up @@ -550,10 +564,10 @@ def censor_word(result, seg_index, word_index):

(
result
.split_by_punctuation([('.', ' '), '。', '?', '?', ',', ','])
.split_by_gap(0.4)
.merge_by_gap(0.1, max_words=3)
.split_by_length(max_words=max_words, max_chars=max_chars)
# .split_by_punctuation([('.', ' '), '。', '?', '?', ',', ','])
# .split_by_gap(0.4)
# .merge_by_gap(0.1, max_words=3)
)

return result
Expand Down

0 comments on commit c4dda48

Please sign in to comment.