From 8a4f85b92be17689450a1bae4f10201f61cbca7a Mon Sep 17 00:00:00 2001 From: ivanshin Date: Mon, 10 Jul 2023 15:13:46 +0300 Subject: [PATCH 1/6] works on gpu --- components/noisereducer.py | 8 +++++--- components/transcriber.py | 7 ++++++- config.yaml | 6 +++--- main.py | 4 ++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/components/noisereducer.py b/components/noisereducer.py index 437eec6..3c767ba 100644 --- a/components/noisereducer.py +++ b/components/noisereducer.py @@ -6,10 +6,13 @@ from typing import Text, Union from pathlib import Path from pydub import AudioSegment -from datetime import datetime, timezone +from datetime import datetime +import librosa +import soundfile import os import noisereduce as nr + SERVICE_NAME = 'NOISE_CLEANER' def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Path]) -> None: @@ -17,11 +20,10 @@ def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Pa file_name = path_to_audio_file.split(os.sep)[-1] sound = AudioSegment.from_file(path_to_audio_file).set_channels(1) - #sound.export("/output/path.wav", format="wav") rate = sound.frame_rate reduced_noise = nr.reduce_noise(y=sound.get_array_of_samples(), sr=rate, prop_decrease= 0.1) ts = str(datetime.timestamp(datetime.now()) * 1000).split('.')[0] - wavfile.write(os.path.join(output_dir, ts + "_" + file_name), rate, reduced_noise) + wavfile.write(os.path.join(output_dir, ts + "_" + file_name), 16000, reduced_noise) return None def cleaner_worker(configs_dict, queue, logs_queue) -> None: diff --git a/components/transcriber.py b/components/transcriber.py index 8b7079d..f44e27f 100644 --- a/components/transcriber.py +++ b/components/transcriber.py @@ -4,8 +4,10 @@ from typing import Text, Union from pathlib import Path from huggingsound import SpeechRecognitionModel +import torch import json import os +import gc SERVICE_NAME = 'TRANSCRIBER' @@ -23,12 +25,15 @@ def transcribe_audio( def transcriber_worker(configs_dict, queue, logs_queue) -> None: """ Daemon cleaner worker """ - model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian") + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian", device) while True: if not queue.empty(): f_path = queue.get() logs_queue.put(f'{f_path} Transcribe start' + '|' + SERVICE_NAME) transcribe_audio(f_path, configs_dict['output_dir'], model) logs_queue.put(f'{f_path} Transcribe end' + '|' + SERVICE_NAME) + torch.cuda.empty_cache() + gc.collect() os.remove(f_path) pass \ No newline at end of file diff --git a/config.yaml b/config.yaml index 27932c5..fc637e0 100644 --- a/config.yaml +++ b/config.yaml @@ -1,5 +1,5 @@ #YAML -working_dir: D:\\WAV2VEC_ASR_WD\ -output_dir: D:\\WAV2VEC_ASR_OUPUT\ +working_dir: /media/ivan/Диск/WAV2VEC_WD +output_dir: /media/ivan/Диск/WAV2VEC_OUTPUT logs_to_db: True # False -logs_db_path: D:\\WAV2VEC_ASR_DB\ # set path where to store SQLite DB if logs_to_db = True| else None \ No newline at end of file +logs_db_path: /media/ivan/Диск/WAV2VEC_DB # set path where to store SQLite DB if logs_to_db = True| else None diff --git a/main.py b/main.py index 675da63..ae0a676 100644 --- a/main.py +++ b/main.py @@ -20,8 +20,8 @@ watchdog_cleaner_proc = mp.Process(target= create_observer, args= (APP_CONFIGS['working_dir'], queue_to_cleaning)) watchdog_cleaner_proc.daemon= True watchdog_cleaner_proc.start() - #watchdog_proc.join() - # 2) cleaner + ##watchdog_proc.join() + ## 2) cleaner cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS, queue_to_cleaning, logs_queue)) cleaner.daemon= True cleaner.start() From 386cb7643f2d5062df45c9f2d982dde116195005 Mon Sep 17 00:00:00 2001 From: ivanshin Date: Mon, 10 Jul 2023 15:15:13 +0300 Subject: [PATCH 2/6] parameters fine-tuning --- components/noisereducer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/noisereducer.py b/components/noisereducer.py index 3c767ba..7e2ff18 100644 --- a/components/noisereducer.py +++ b/components/noisereducer.py @@ -21,7 +21,7 @@ def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Pa file_name = path_to_audio_file.split(os.sep)[-1] sound = AudioSegment.from_file(path_to_audio_file).set_channels(1) rate = sound.frame_rate - reduced_noise = nr.reduce_noise(y=sound.get_array_of_samples(), sr=rate, prop_decrease= 0.1) + reduced_noise = nr.reduce_noise(y=sound.get_array_of_samples(), sr=rate, prop_decrease= 0.3) ts = str(datetime.timestamp(datetime.now()) * 1000).split('.')[0] wavfile.write(os.path.join(output_dir, ts + "_" + file_name), 16000, reduced_noise) return None From fc3e7930d8e0d0dd90e1bc084108858a5e9b0aa3 Mon Sep 17 00:00:00 2001 From: ivanshin Date: Wed, 19 Jul 2023 14:13:58 +0300 Subject: [PATCH 3/6] whisper implementation --- components/transcriber.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/components/transcriber.py b/components/transcriber.py index f44e27f..a6d9d97 100644 --- a/components/transcriber.py +++ b/components/transcriber.py @@ -4,12 +4,14 @@ from typing import Text, Union from pathlib import Path from huggingsound import SpeechRecognitionModel +import whisper import torch import json import os import gc SERVICE_NAME = 'TRANSCRIBER' +BATCH_SIZE = 32 def transcribe_audio( path_to_audio_file: Union[Text,Path], @@ -18,7 +20,7 @@ def transcribe_audio( """ Transcribe single audio file """ file_name = path_to_audio_file.split(os.sep)[-1] - transcription = model.transcribe([path_to_audio_file]) + transcription = model.transcribe(path_to_audio_file, language='ru', fp16=True) with open(os.path.join(output_dir, file_name.split('.')[0] + '.json'), 'w', encoding='utf8') as out_file: json.dump(transcription, out_file, ensure_ascii= False) return None @@ -26,7 +28,11 @@ def transcribe_audio( def transcriber_worker(configs_dict, queue, logs_queue) -> None: """ Daemon cleaner worker """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian", device) + #check or load model + model = whisper.load_model("large-v2", device) + #model = whisper.load_model("medium", device) + #model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian", device) + f_path = [] while True: if not queue.empty(): f_path = queue.get() From 299c32c7c205cb53e5fba70a4bb09c6c56aef4ca Mon Sep 17 00:00:00 2001 From: ivanshin Date: Sun, 20 Aug 2023 18:04:40 +0300 Subject: [PATCH 4/6] multiple transcribers support added --- components/config_structure.py | 25 ++++++++++++++++++++++++ components/configs_reader.py | 34 +++++++++++++-------------------- components/logs_writer.py | 4 ++-- components/noisereducer.py | 2 +- components/transcriber.py | 19 ++++++++++-------- config.yaml | 15 +++++++++++++++ main.py | 35 ++++++++++++++++++++++++++-------- 7 files changed, 94 insertions(+), 40 deletions(-) create mode 100644 components/config_structure.py diff --git a/components/config_structure.py b/components/config_structure.py new file mode 100644 index 0000000..ff943bc --- /dev/null +++ b/components/config_structure.py @@ -0,0 +1,25 @@ +from typing import Text, Union, Dict, Iterable +from pathlib import Path +from pydantic import BaseModel, model_validator + +# Base model for configuration file +class ConfigStructure(BaseModel): + working_dir: Union[Text,Path] + output_dir: Union[Text,Path] + clean_audio_dir: Union[Text,Path] = None + model: Union[Text,None] = 'large-v2' + devices: Union[Text, Iterable, None] = 'cpu' + logs_to_db: bool + logs_db_path: Union[Text,Path,None] + class Config: + extra = 'forbid' + validate_assigment = True + + @model_validator(mode= 'before') + @classmethod + def set_null_feilds(cls, field_values): + if field_values['devices'] is None: + field_values['devices'] = ['cpu'] + if field_values['model'] is None: + field_values['model'] = 'tiny' + return field_values \ No newline at end of file diff --git a/components/configs_reader.py b/components/configs_reader.py index 2493172..98948b3 100644 --- a/components/configs_reader.py +++ b/components/configs_reader.py @@ -4,45 +4,37 @@ """ from typing import Text, Union, Dict from pathlib import Path -from pydantic import BaseModel +from components.config_structure import ConfigStructure import os import yaml -# Base model for configuration file -class ConfigStructure(BaseModel): - working_dir: Union[Text,Path] - output_dir: Union[Text,Path] - logs_to_db: bool - logs_db_path: Union[Text,Path,None] - class Config: - extra = 'forbid' - -def create_dirs(configs_dict: Dict) -> None: +def create_dirs(configs_dict: ConfigStructure) -> None: """ Create necessary directories """ - clean_audio_path = os.path.join(configs_dict['working_dir'], 'CLEAN_AUDIO') + clean_audio_path = os.path.join(configs_dict.working_dir, 'CLEAN_AUDIO') if not os.path.exists(clean_audio_path): os.makedirs(clean_audio_path) - configs_dict['clean_audio_dir'] = clean_audio_path - if not os.path.exists(configs_dict['output_dir']): - os.makedirs(configs_dict['output_dir']) - if configs_dict['logs_to_db'] == True: - if not os.path.exists(configs_dict['logs_db_path']): - os.makedirs(configs_dict['logs_db_path']) + configs_dict.clean_audio_dir = clean_audio_path + if not os.path.exists(configs_dict.output_dir): + os.makedirs(configs_dict.output_dir) + if configs_dict.logs_to_db == True: + if not os.path.exists(configs_dict.logs_db_path): + os.makedirs(configs_dict.logs_db_path) return def validate(configs_dict: Dict) -> None: valid_conf_model = ConfigStructure(**configs_dict) - return + return valid_conf_model -def read_configs(config_file: Union[Text,Path] = "config.yaml") -> Dict: +def read_configs(config_file: Union[Text,Path] = "config.yaml") -> ConfigStructure: """ Read configs""" configs_dict = dict() with open(config_file, "r") as stream: try: configs_dict = yaml.safe_load(stream) - validate(configs_dict) + configs_dict = validate(configs_dict) + configs_dict.devices = list(configs_dict.devices) create_dirs(configs_dict) return configs_dict except yaml.YAMLError as exc: diff --git a/components/logs_writer.py b/components/logs_writer.py index 31f4e02..7791ae4 100644 --- a/components/logs_writer.py +++ b/components/logs_writer.py @@ -13,9 +13,9 @@ def configure_loger(APP_CONFIGS) -> logging.Logger: attributes_list = ['asctime', 'levelname', 'service_name', 'message'] formatter = logging.Formatter('%(' + ((')s' + db_logs_handler.DEFAULT_SEPARATOR + '%(').join(attributes_list)) + ')s') - if APP_CONFIGS['logs_to_db'] == True: + if APP_CONFIGS.logs_to_db == True: logger.propagate = False - database = os.path.join(APP_CONFIGS['logs_db_path'], 'LOGS.db') + database = os.path.join(APP_CONFIGS.logs_db_path, 'LOGS.db') table = 'asr_logs' sql_handler = db_logs_handler.SQLiteHandler(database = database, table = table, attributes_list = attributes_list) sql_handler.setLevel(logging.INFO) diff --git a/components/noisereducer.py b/components/noisereducer.py index 7e2ff18..87562f2 100644 --- a/components/noisereducer.py +++ b/components/noisereducer.py @@ -32,7 +32,7 @@ def cleaner_worker(configs_dict, queue, logs_queue) -> None: if not queue.empty(): f_path = queue.get() logs_queue.put(f'{f_path} Clean start' + '|' + SERVICE_NAME) - reduce_noise(f_path, configs_dict['clean_audio_dir']) + reduce_noise(f_path, configs_dict.clean_audio_dir) logs_queue.put(f'{f_path} Clean end'+'|' + SERVICE_NAME) os.remove(f_path) pass diff --git a/components/transcriber.py b/components/transcriber.py index a6d9d97..47f5c4e 100644 --- a/components/transcriber.py +++ b/components/transcriber.py @@ -4,6 +4,7 @@ from typing import Text, Union from pathlib import Path from huggingsound import SpeechRecognitionModel +import signal import whisper import torch import json @@ -25,20 +26,22 @@ def transcribe_audio( json.dump(transcription, out_file, ensure_ascii= False) return None -def transcriber_worker(configs_dict, queue, logs_queue) -> None: + +def transcriber_worker(configs_dict, queue, logs_queue, device) -> None: """ Daemon cleaner worker """ - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #check or load model - model = whisper.load_model("large-v2", device) - #model = whisper.load_model("medium", device) - #model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian", device) + try: + model = whisper.load_model(configs_dict.model, torch.device(device)) + except RuntimeError as e: + print(e) + os.kill(os.getppid(), signal.SIGTERM) # kill parent proc f_path = [] while True: if not queue.empty(): f_path = queue.get() - logs_queue.put(f'{f_path} Transcribe start' + '|' + SERVICE_NAME) - transcribe_audio(f_path, configs_dict['output_dir'], model) - logs_queue.put(f'{f_path} Transcribe end' + '|' + SERVICE_NAME) + logs_queue.put(f'{f_path} Transcribe start' + '|' + SERVICE_NAME + f'_on_{device}_{os.getpid()}') + transcribe_audio(f_path, configs_dict.output_dir, model) + logs_queue.put(f'{f_path} Transcribe end' + '|' + SERVICE_NAME + f'_on_{device}') torch.cuda.empty_cache() gc.collect() os.remove(f_path) diff --git a/config.yaml b/config.yaml index fc637e0..a5a22a0 100644 --- a/config.yaml +++ b/config.yaml @@ -1,5 +1,20 @@ #YAML +# [directories settings] working_dir: /media/ivan/Диск/WAV2VEC_WD output_dir: /media/ivan/Диск/WAV2VEC_OUTPUT + +# [transcriber settings] +model: #large-v2 # 'tiny' by default if blank (Whisper model settings) + +# [multiple GPU support] +devices: + - cpu + # set your cuda devices: #- cuda:0 + #- cuda:1 + # OR + #set blank if CPU (set same device multiple times to spawn more workers: #- cpu + #- cpu) + +# [logging settings] logs_to_db: True # False logs_db_path: /media/ivan/Диск/WAV2VEC_DB # set path where to store SQLite DB if logs_to_db = True| else None diff --git a/main.py b/main.py index ae0a676..c7187a1 100644 --- a/main.py +++ b/main.py @@ -17,22 +17,40 @@ # subprocesses # 1) watchdog with queue to cleaning - watchdog_cleaner_proc = mp.Process(target= create_observer, args= (APP_CONFIGS['working_dir'], queue_to_cleaning)) + watchdog_cleaner_proc = mp.Process(target= create_observer, args= (APP_CONFIGS.working_dir,\ + queue_to_cleaning)) watchdog_cleaner_proc.daemon= True watchdog_cleaner_proc.start() ##watchdog_proc.join() + + ## 2) cleaner - cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS, queue_to_cleaning, logs_queue)) + cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS,\ + queue_to_cleaning,\ + logs_queue)) cleaner.daemon= True cleaner.start() + + # 3) watchdog with queue to transcribation - watchdog_transcribe_proc = mp.Process(target= create_observer, args= (APP_CONFIGS['clean_audio_dir'], queue_to_transcribe)) + watchdog_transcribe_proc = mp.Process(target= create_observer, args= (APP_CONFIGS.clean_audio_dir,\ + queue_to_transcribe)) watchdog_transcribe_proc.daemon= True watchdog_transcribe_proc.start() - # 4) Russian wav2vec implementation - transcriber_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS, queue_to_transcribe, logs_queue)) - transcriber_proc.daemon= True - transcriber_proc.start() + + + # 4) Whisper transcriber implementation (multiple GPU support) + transcriber_proc = [] + for device in APP_CONFIGS.devices: + t_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS,\ + queue_to_transcribe,\ + logs_queue,\ + device)) + t_proc.daemon= True + transcriber_proc.append(t_proc) + t_proc.start() + + logging.LoggerAdapter(logger, {'service_name': 'MAIN'}).info('Startup success') try: while True: @@ -43,7 +61,8 @@ watchdog_cleaner_proc.terminate() cleaner.terminate() watchdog_transcribe_proc.terminate() - transcriber_proc.terminate() + for proc in transcriber_proc: + proc.terminate() logging.LoggerAdapter(logger, {'service_name': 'MAIN'}).info('All processes terminated') From 645eaa43ca183e4336725e3cb12cc1145c1f1be9 Mon Sep 17 00:00:00 2001 From: ivanshin Date: Sun, 20 Aug 2023 18:06:13 +0300 Subject: [PATCH 5/6] upd readme --- README.md | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 99f9796..408c639 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,25 @@ set your working directory for app and ouput directory: ```yaml #YAML EXAMPLE -working_dir: D:\\WAV2VEC_ASR_WD\ -output_dir: D:\\WAV2VEC_ASR_OUPUT\ +# [directories settings] +working_dir: /media/ivan/Диск/WAV2VEC_WD +output_dir: /media/ivan/Диск/WAV2VEC_OUTPUT + +# [transcriber settings] +model: #large-v2 # 'tiny' by default if blank (Whisper model settings) + +# [multiple GPU support] +devices: + - cpu + # set your cuda devices: #- cuda:0 + #- cuda:1 + # OR + #set blank if CPU (set same device multiple times to spawn more workers: #- cpu + #- cpu) + +# [logging settings] logs_to_db: True # False -logs_db_path: D:\\WAV2VEC_ASR_DB\ # set path where to store SQLite DB if logs_to_db = True else None +logs_db_path: /media/ivan/Диск/WAV2VEC_DB # set path where to store SQLite DB if logs_to_db = True| else None ``` 2) Create venv and activate: From f42c278b3799c75534fd878fcd67faead74cec7b Mon Sep 17 00:00:00 2001 From: ivanshin Date: Sun, 20 Aug 2023 18:07:26 +0300 Subject: [PATCH 6/6] upd req --- requirements.txt | Bin 2496 -> 2876 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7d2f4adf055889d8fb152ceab1a5141561c18297..0f6e0bb9ee954ef87d57ed256dc6d3e4a8508272 100644 GIT binary patch delta 378 zcmZ9IKT88a5XE04K@m~~Y;0^&h}>nfi7^p`RJQ4CtnR{H67uK5{n66aR_==kDQqnK z9QtMa-9}Jk*qPZkvv20@N9+CM^{#%NC{-j+3+2QVpI|Fxy1}|K^6|b*IjMg=CEC)p zQ9!?^N6m?u!&04T%$Yf+3o1FbrFyLnw#Vlx6v8%T#v1f5U*{j6|MO