diff --git a/README.md b/README.md index 99f9796..408c639 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,25 @@ set your working directory for app and ouput directory: ```yaml #YAML EXAMPLE -working_dir: D:\\WAV2VEC_ASR_WD\ -output_dir: D:\\WAV2VEC_ASR_OUPUT\ +# [directories settings] +working_dir: /media/ivan/Диск/WAV2VEC_WD +output_dir: /media/ivan/Диск/WAV2VEC_OUTPUT + +# [transcriber settings] +model: #large-v2 # 'tiny' by default if blank (Whisper model settings) + +# [multiple GPU support] +devices: + - cpu + # set your cuda devices: #- cuda:0 + #- cuda:1 + # OR + #set blank if CPU (set same device multiple times to spawn more workers: #- cpu + #- cpu) + +# [logging settings] logs_to_db: True # False -logs_db_path: D:\\WAV2VEC_ASR_DB\ # set path where to store SQLite DB if logs_to_db = True else None +logs_db_path: /media/ivan/Диск/WAV2VEC_DB # set path where to store SQLite DB if logs_to_db = True| else None ``` 2) Create venv and activate: diff --git a/components/config_structure.py b/components/config_structure.py new file mode 100644 index 0000000..ff943bc --- /dev/null +++ b/components/config_structure.py @@ -0,0 +1,25 @@ +from typing import Text, Union, Dict, Iterable +from pathlib import Path +from pydantic import BaseModel, model_validator + +# Base model for configuration file +class ConfigStructure(BaseModel): + working_dir: Union[Text,Path] + output_dir: Union[Text,Path] + clean_audio_dir: Union[Text,Path] = None + model: Union[Text,None] = 'large-v2' + devices: Union[Text, Iterable, None] = 'cpu' + logs_to_db: bool + logs_db_path: Union[Text,Path,None] + class Config: + extra = 'forbid' + validate_assigment = True + + @model_validator(mode= 'before') + @classmethod + def set_null_feilds(cls, field_values): + if field_values['devices'] is None: + field_values['devices'] = ['cpu'] + if field_values['model'] is None: + field_values['model'] = 'tiny' + return field_values \ No newline at end of file diff --git a/components/configs_reader.py b/components/configs_reader.py index 2493172..98948b3 100644 --- a/components/configs_reader.py +++ b/components/configs_reader.py @@ -4,45 +4,37 @@ """ from typing import Text, Union, Dict from pathlib import Path -from pydantic import BaseModel +from components.config_structure import ConfigStructure import os import yaml -# Base model for configuration file -class ConfigStructure(BaseModel): - working_dir: Union[Text,Path] - output_dir: Union[Text,Path] - logs_to_db: bool - logs_db_path: Union[Text,Path,None] - class Config: - extra = 'forbid' - -def create_dirs(configs_dict: Dict) -> None: +def create_dirs(configs_dict: ConfigStructure) -> None: """ Create necessary directories """ - clean_audio_path = os.path.join(configs_dict['working_dir'], 'CLEAN_AUDIO') + clean_audio_path = os.path.join(configs_dict.working_dir, 'CLEAN_AUDIO') if not os.path.exists(clean_audio_path): os.makedirs(clean_audio_path) - configs_dict['clean_audio_dir'] = clean_audio_path - if not os.path.exists(configs_dict['output_dir']): - os.makedirs(configs_dict['output_dir']) - if configs_dict['logs_to_db'] == True: - if not os.path.exists(configs_dict['logs_db_path']): - os.makedirs(configs_dict['logs_db_path']) + configs_dict.clean_audio_dir = clean_audio_path + if not os.path.exists(configs_dict.output_dir): + os.makedirs(configs_dict.output_dir) + if configs_dict.logs_to_db == True: + if not os.path.exists(configs_dict.logs_db_path): + os.makedirs(configs_dict.logs_db_path) return def validate(configs_dict: Dict) -> None: valid_conf_model = ConfigStructure(**configs_dict) - return + return valid_conf_model -def read_configs(config_file: Union[Text,Path] = "config.yaml") -> Dict: +def read_configs(config_file: Union[Text,Path] = "config.yaml") -> ConfigStructure: """ Read configs""" configs_dict = dict() with open(config_file, "r") as stream: try: configs_dict = yaml.safe_load(stream) - validate(configs_dict) + configs_dict = validate(configs_dict) + configs_dict.devices = list(configs_dict.devices) create_dirs(configs_dict) return configs_dict except yaml.YAMLError as exc: diff --git a/components/logs_writer.py b/components/logs_writer.py index 31f4e02..7791ae4 100644 --- a/components/logs_writer.py +++ b/components/logs_writer.py @@ -13,9 +13,9 @@ def configure_loger(APP_CONFIGS) -> logging.Logger: attributes_list = ['asctime', 'levelname', 'service_name', 'message'] formatter = logging.Formatter('%(' + ((')s' + db_logs_handler.DEFAULT_SEPARATOR + '%(').join(attributes_list)) + ')s') - if APP_CONFIGS['logs_to_db'] == True: + if APP_CONFIGS.logs_to_db == True: logger.propagate = False - database = os.path.join(APP_CONFIGS['logs_db_path'], 'LOGS.db') + database = os.path.join(APP_CONFIGS.logs_db_path, 'LOGS.db') table = 'asr_logs' sql_handler = db_logs_handler.SQLiteHandler(database = database, table = table, attributes_list = attributes_list) sql_handler.setLevel(logging.INFO) diff --git a/components/noisereducer.py b/components/noisereducer.py index 437eec6..87562f2 100644 --- a/components/noisereducer.py +++ b/components/noisereducer.py @@ -6,10 +6,13 @@ from typing import Text, Union from pathlib import Path from pydub import AudioSegment -from datetime import datetime, timezone +from datetime import datetime +import librosa +import soundfile import os import noisereduce as nr + SERVICE_NAME = 'NOISE_CLEANER' def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Path]) -> None: @@ -17,11 +20,10 @@ def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Pa file_name = path_to_audio_file.split(os.sep)[-1] sound = AudioSegment.from_file(path_to_audio_file).set_channels(1) - #sound.export("/output/path.wav", format="wav") rate = sound.frame_rate - reduced_noise = nr.reduce_noise(y=sound.get_array_of_samples(), sr=rate, prop_decrease= 0.1) + reduced_noise = nr.reduce_noise(y=sound.get_array_of_samples(), sr=rate, prop_decrease= 0.3) ts = str(datetime.timestamp(datetime.now()) * 1000).split('.')[0] - wavfile.write(os.path.join(output_dir, ts + "_" + file_name), rate, reduced_noise) + wavfile.write(os.path.join(output_dir, ts + "_" + file_name), 16000, reduced_noise) return None def cleaner_worker(configs_dict, queue, logs_queue) -> None: @@ -30,7 +32,7 @@ def cleaner_worker(configs_dict, queue, logs_queue) -> None: if not queue.empty(): f_path = queue.get() logs_queue.put(f'{f_path} Clean start' + '|' + SERVICE_NAME) - reduce_noise(f_path, configs_dict['clean_audio_dir']) + reduce_noise(f_path, configs_dict.clean_audio_dir) logs_queue.put(f'{f_path} Clean end'+'|' + SERVICE_NAME) os.remove(f_path) pass diff --git a/components/transcriber.py b/components/transcriber.py index 8b7079d..47f5c4e 100644 --- a/components/transcriber.py +++ b/components/transcriber.py @@ -4,10 +4,15 @@ from typing import Text, Union from pathlib import Path from huggingsound import SpeechRecognitionModel +import signal +import whisper +import torch import json import os +import gc SERVICE_NAME = 'TRANSCRIBER' +BATCH_SIZE = 32 def transcribe_audio( path_to_audio_file: Union[Text,Path], @@ -16,19 +21,28 @@ def transcribe_audio( """ Transcribe single audio file """ file_name = path_to_audio_file.split(os.sep)[-1] - transcription = model.transcribe([path_to_audio_file]) + transcription = model.transcribe(path_to_audio_file, language='ru', fp16=True) with open(os.path.join(output_dir, file_name.split('.')[0] + '.json'), 'w', encoding='utf8') as out_file: json.dump(transcription, out_file, ensure_ascii= False) return None -def transcriber_worker(configs_dict, queue, logs_queue) -> None: + +def transcriber_worker(configs_dict, queue, logs_queue, device) -> None: """ Daemon cleaner worker """ - model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian") + #check or load model + try: + model = whisper.load_model(configs_dict.model, torch.device(device)) + except RuntimeError as e: + print(e) + os.kill(os.getppid(), signal.SIGTERM) # kill parent proc + f_path = [] while True: if not queue.empty(): f_path = queue.get() - logs_queue.put(f'{f_path} Transcribe start' + '|' + SERVICE_NAME) - transcribe_audio(f_path, configs_dict['output_dir'], model) - logs_queue.put(f'{f_path} Transcribe end' + '|' + SERVICE_NAME) + logs_queue.put(f'{f_path} Transcribe start' + '|' + SERVICE_NAME + f'_on_{device}_{os.getpid()}') + transcribe_audio(f_path, configs_dict.output_dir, model) + logs_queue.put(f'{f_path} Transcribe end' + '|' + SERVICE_NAME + f'_on_{device}') + torch.cuda.empty_cache() + gc.collect() os.remove(f_path) pass \ No newline at end of file diff --git a/config.yaml b/config.yaml index 27932c5..a5a22a0 100644 --- a/config.yaml +++ b/config.yaml @@ -1,5 +1,20 @@ #YAML -working_dir: D:\\WAV2VEC_ASR_WD\ -output_dir: D:\\WAV2VEC_ASR_OUPUT\ +# [directories settings] +working_dir: /media/ivan/Диск/WAV2VEC_WD +output_dir: /media/ivan/Диск/WAV2VEC_OUTPUT + +# [transcriber settings] +model: #large-v2 # 'tiny' by default if blank (Whisper model settings) + +# [multiple GPU support] +devices: + - cpu + # set your cuda devices: #- cuda:0 + #- cuda:1 + # OR + #set blank if CPU (set same device multiple times to spawn more workers: #- cpu + #- cpu) + +# [logging settings] logs_to_db: True # False -logs_db_path: D:\\WAV2VEC_ASR_DB\ # set path where to store SQLite DB if logs_to_db = True| else None \ No newline at end of file +logs_db_path: /media/ivan/Диск/WAV2VEC_DB # set path where to store SQLite DB if logs_to_db = True| else None diff --git a/main.py b/main.py index 675da63..c7187a1 100644 --- a/main.py +++ b/main.py @@ -17,22 +17,40 @@ # subprocesses # 1) watchdog with queue to cleaning - watchdog_cleaner_proc = mp.Process(target= create_observer, args= (APP_CONFIGS['working_dir'], queue_to_cleaning)) + watchdog_cleaner_proc = mp.Process(target= create_observer, args= (APP_CONFIGS.working_dir,\ + queue_to_cleaning)) watchdog_cleaner_proc.daemon= True watchdog_cleaner_proc.start() - #watchdog_proc.join() - # 2) cleaner - cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS, queue_to_cleaning, logs_queue)) + ##watchdog_proc.join() + + + ## 2) cleaner + cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS,\ + queue_to_cleaning,\ + logs_queue)) cleaner.daemon= True cleaner.start() + + # 3) watchdog with queue to transcribation - watchdog_transcribe_proc = mp.Process(target= create_observer, args= (APP_CONFIGS['clean_audio_dir'], queue_to_transcribe)) + watchdog_transcribe_proc = mp.Process(target= create_observer, args= (APP_CONFIGS.clean_audio_dir,\ + queue_to_transcribe)) watchdog_transcribe_proc.daemon= True watchdog_transcribe_proc.start() - # 4) Russian wav2vec implementation - transcriber_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS, queue_to_transcribe, logs_queue)) - transcriber_proc.daemon= True - transcriber_proc.start() + + + # 4) Whisper transcriber implementation (multiple GPU support) + transcriber_proc = [] + for device in APP_CONFIGS.devices: + t_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS,\ + queue_to_transcribe,\ + logs_queue,\ + device)) + t_proc.daemon= True + transcriber_proc.append(t_proc) + t_proc.start() + + logging.LoggerAdapter(logger, {'service_name': 'MAIN'}).info('Startup success') try: while True: @@ -43,7 +61,8 @@ watchdog_cleaner_proc.terminate() cleaner.terminate() watchdog_transcribe_proc.terminate() - transcriber_proc.terminate() + for proc in transcriber_proc: + proc.terminate() logging.LoggerAdapter(logger, {'service_name': 'MAIN'}).info('All processes terminated') diff --git a/requirements.txt b/requirements.txt index 7d2f4ad..0f6e0bb 100644 Binary files a/requirements.txt and b/requirements.txt differ