From a25c3844409ca567dc22abdc4b03416fce54f448 Mon Sep 17 00:00:00 2001 From: Raivis Dejus Date: Sun, 25 Aug 2024 20:29:49 +0300 Subject: [PATCH] Adding option to set n_threads for Whisper.cpp (#892) --- CONTRIBUTING.md | 20 ++++++-------------- Makefile | 23 ++++++++++++++++++++--- buzz/transcriber/whisper_cpp.py | 2 ++ docs/docs/preferences.md | 32 +++++++++++++++++++++++++++++++- 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a0daa101e..a9a20bfd7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -94,7 +94,8 @@ Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManage ``` 2. Install the GNU make. `choco install make` 3. Install the ffmpeg. `choco install ffmpeg` -4. Install Poetry, paste this info Windows PowerShell line by line. [More info](https://python-poetry.org/docs/) +4. Install [MSYS2](https://www.msys2.org/), follow [this guide](https://sajidifti.medium.com/how-to-install-gcc-and-gdb-on-windows-using-msys2-tutorial-0fceb7e66454). +5. Install Poetry, paste this info Windows PowerShell line by line. [More info](https://python-poetry.org/docs/) ``` (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | py - @@ -102,23 +103,14 @@ Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManage Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser ``` -5. Restart Windows. +6. Restart Windows. -6. Clone the repository `git clone --recursive https://github.com/chidiwilliams/buzz.git` -7. Enter repo folder `cd buzz` -8. Copy `whisper.dll` from the repo backup to `buzz` folder. -``` -cp -r .\dll_backup\ .\buzz\ -``` +7. Clone the repository `git clone --recursive https://github.com/chidiwilliams/buzz.git` +8. Enter repo folder `cd buzz` 9. Activate the virtual environment `poetry shell` 10. Install the dependencies `poetry install` 11. Build Buzz `poetry build` -12. Install Buzz -``` -$whlFile = Get-ChildItem .\dist\buzz*.whl | Select-Object -First 1 -pip install $whlFile -``` -13. Run Buzz `python -m buzz` +12. Run Buzz `python -m buzz` #### GPU Support diff --git a/Makefile b/Makefile index 6c49409e0..7ebc758b4 100644 --- a/Makefile +++ b/Makefile @@ -26,10 +26,17 @@ else endif clean: +ifeq ($(OS), Windows_NT) + del /f buzz\$(LIBWHISPER) 2> nul + del /f buzz\whisper_cpp.py 2> nul + rmdir /s /q whisper.cpp\build 2> nul + rmdir /s /q dist 2> nul +else rm -f buzz/$(LIBWHISPER) rm -f buzz/whisper_cpp.py rm -rf whisper.cpp/build || true rm -rf dist/* || true +endif COVERAGE_THRESHOLD := 75 @@ -68,9 +75,9 @@ else endif buzz/$(LIBWHISPER): -ifeq ($(OS),Windows_NT) - cp dll_backup/whisper.dll buzz || true - cp dll_backup/SDL2.dll buzz || true +ifeq ($(OS), Windows_NT) + cp dll_backup/whisper.dll buzz || copy dll_backup\whisper.dll buzz\whisper.dll + cp dll_backup/SDL2.dll buzz || copy dll_backup\SDL2.dll buzz\SDL2.dll else cmake -S whisper.cpp -B whisper.cpp/build/ $(CMAKE_FLAGS) cmake --build whisper.cpp/build --verbose @@ -98,6 +105,7 @@ dmg_mac: --app-drop-link 425 120 \ --codesign "$$BUZZ_CODESIGN_IDENTITY" \ --notarize "$$BUZZ_KEYCHAIN_NOTARY_PROFILE" \ + --filesystem APFS \ "${mac_dmg_path}" \ "dist/dmg/" @@ -188,10 +196,19 @@ translation_po: sed -i.bak 's/CHARSET/UTF-8/' ${TMP_POT_FILE_PATH} && rm ${TMP_POT_FILE_PATH}.bak msgmerge -U ${PO_FILE_PATH} ${TMP_POT_FILE_PATH} +# On windows we can have two ways to compile locales, one for CI the other for local builds +# Will try both and ignore errors if they fail translation_mo: +ifeq ($(OS), Windows_NT) + -forfiles /p buzz\locale /c "cmd /c python ..\..\msgfmt.py -o @path\LC_MESSAGES\buzz.mo @path\LC_MESSAGES\buzz.po" + -for dir in buzz/locale/*/ ; do \ + python msgfmt.py -o $$dir/LC_MESSAGES/buzz.mo $$dir/LC_MESSAGES/buzz.po; \ + done +else for dir in buzz/locale/*/ ; do \ python msgfmt.py -o $$dir/LC_MESSAGES/buzz.mo $$dir/LC_MESSAGES/buzz.po; \ done +endif lint: ruff check . --fix diff --git a/buzz/transcriber/whisper_cpp.py b/buzz/transcriber/whisper_cpp.py index bea1d8d60..1d6a2af08 100644 --- a/buzz/transcriber/whisper_cpp.py +++ b/buzz/transcriber/whisper_cpp.py @@ -1,3 +1,4 @@ +import os import ctypes import logging from typing import Union, Any, List @@ -109,6 +110,7 @@ def whisper_cpp_params( params = whisper_cpp.whisper_full_default_params( whisper_cpp.WHISPER_SAMPLING_GREEDY ) + params.n_threads = int(os.getenv("BUZZ_WHISPERCPP_N_THREADS", 4)) params.print_realtime = print_realtime params.print_progress = print_progress diff --git a/docs/docs/preferences.md b/docs/docs/preferences.md index 1b249875b..f4ca56286 100644 --- a/docs/docs/preferences.md +++ b/docs/docs/preferences.md @@ -34,4 +34,34 @@ Available variables: Live transcription export can be used to integrate Buzz with other applications like OBS Studio. When enabled, live text transcripts will be exported to a text file as they get generated and translated. -If AI translation is enabled for live recordings, the translated text will also be exported to the text file. Filename for the translated text will end with `.translated.txt`. \ No newline at end of file +If AI translation is enabled for live recordings, the translated text will also be exported to the text file. Filename for the translated text will end with `.translated.txt`. + +## Advanced Preferences + +To keep preferences section simple for new users, some more advanced preferences are settable via OS environment variables. Set the necessary environment variables in your OS before starting Buzz or create a script to set them. + +On MacOS and Linux crete `run_buzz.sh` with the following content: + +```bash +#!/bin/bash +export VARIABLE=value +export SOME_OTHER_VARIABLE=some_other_value +buzz +``` + +On Windows crete `run_buzz.bat` with the following content: + +```bat +@echo off +set VARIABLE=value +set SOME_OTHER_VARIABLE=some_other_value +"C:\Program Files (x86)\Buzz\Buzz.exe" +``` + +### Available variables + +**BUZZ_WHISPERCPP_N_THREADS** - Number of threads to use for Whisper.cpp model. Default is `4`. Available from `v1.0.2`. + +On a laptop with 16 threads setting `BUZZ_WHISPERCPP_N_THREADS=8` leads to some 15% speedup in transcription time. +Increasing number of threads even more will lead in slower transcription time as results from parallel threads has to be +combined to produce the final answer. \ No newline at end of file