From 8a4f85b92be17689450a1bae4f10201f61cbca7a Mon Sep 17 00:00:00 2001
From: ivanshin <i.shingel@outlook.com>
Date: Mon, 10 Jul 2023 15:13:46 +0300
Subject: [PATCH 1/6] works on gpu

---
 components/noisereducer.py | 8 +++++---
 components/transcriber.py  | 7 ++++++-
 config.yaml                | 6 +++---
 main.py                    | 4 ++--
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/components/noisereducer.py b/components/noisereducer.py
index 437eec6..3c767ba 100644
--- a/components/noisereducer.py
+++ b/components/noisereducer.py
@@ -6,10 +6,13 @@
 from typing import Text, Union
 from pathlib import Path
 from pydub import AudioSegment
-from datetime import datetime, timezone
+from datetime import datetime
+import librosa
+import soundfile
 import os
 import noisereduce as nr
 
+
 SERVICE_NAME = 'NOISE_CLEANER'
 
 def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Path]) -> None:
@@ -17,11 +20,10 @@ def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Pa
 
     file_name = path_to_audio_file.split(os.sep)[-1] 
     sound = AudioSegment.from_file(path_to_audio_file).set_channels(1)
-    #sound.export("/output/path.wav", format="wav")
     rate = sound.frame_rate
     reduced_noise = nr.reduce_noise(y=sound.get_array_of_samples(), sr=rate, prop_decrease= 0.1)
     ts = str(datetime.timestamp(datetime.now()) * 1000).split('.')[0]
-    wavfile.write(os.path.join(output_dir, ts + "_" + file_name), rate, reduced_noise)
+    wavfile.write(os.path.join(output_dir, ts + "_" + file_name), 16000, reduced_noise)
     return None
 
 def cleaner_worker(configs_dict, queue, logs_queue) -> None:
diff --git a/components/transcriber.py b/components/transcriber.py
index 8b7079d..f44e27f 100644
--- a/components/transcriber.py
+++ b/components/transcriber.py
@@ -4,8 +4,10 @@
 from typing import Text, Union
 from pathlib import Path
 from huggingsound import SpeechRecognitionModel
+import torch
 import json
 import os
+import gc
 
 SERVICE_NAME = 'TRANSCRIBER'
 
@@ -23,12 +25,15 @@ def transcribe_audio(
 
 def transcriber_worker(configs_dict, queue, logs_queue) -> None:
     """ Daemon cleaner worker """
-    model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian", device)
     while True:
         if not queue.empty():
             f_path = queue.get()
             logs_queue.put(f'{f_path} Transcribe start' + '|' + SERVICE_NAME)
             transcribe_audio(f_path, configs_dict['output_dir'], model)
             logs_queue.put(f'{f_path} Transcribe end' + '|' + SERVICE_NAME)
+            torch.cuda.empty_cache()
+            gc.collect()
             os.remove(f_path)
         pass
\ No newline at end of file
diff --git a/config.yaml b/config.yaml
index 27932c5..fc637e0 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,5 +1,5 @@
 #YAML
-working_dir: D:\\WAV2VEC_ASR_WD\
-output_dir: D:\\WAV2VEC_ASR_OUPUT\
+working_dir: /media/ivan/Диск/WAV2VEC_WD
+output_dir: /media/ivan/Диск/WAV2VEC_OUTPUT
 logs_to_db: True # False
-logs_db_path: D:\\WAV2VEC_ASR_DB\ # set path where to store SQLite DB if logs_to_db = True| else None
\ No newline at end of file
+logs_db_path: /media/ivan/Диск/WAV2VEC_DB # set path where to store SQLite DB if logs_to_db = True| else None
diff --git a/main.py b/main.py
index 675da63..ae0a676 100644
--- a/main.py
+++ b/main.py
@@ -20,8 +20,8 @@
     watchdog_cleaner_proc = mp.Process(target= create_observer, args= (APP_CONFIGS['working_dir'], queue_to_cleaning))
     watchdog_cleaner_proc.daemon= True
     watchdog_cleaner_proc.start()
-    #watchdog_proc.join()
-    # 2) cleaner
+    ##watchdog_proc.join()
+    ## 2) cleaner
     cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS, queue_to_cleaning, logs_queue))
     cleaner.daemon= True
     cleaner.start()

From 386cb7643f2d5062df45c9f2d982dde116195005 Mon Sep 17 00:00:00 2001
From: ivanshin <i.shingel@outlook.com>
Date: Mon, 10 Jul 2023 15:15:13 +0300
Subject: [PATCH 2/6] parameters fine-tuning

---
 components/noisereducer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/components/noisereducer.py b/components/noisereducer.py
index 3c767ba..7e2ff18 100644
--- a/components/noisereducer.py
+++ b/components/noisereducer.py
@@ -21,7 +21,7 @@ def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Pa
     file_name = path_to_audio_file.split(os.sep)[-1] 
     sound = AudioSegment.from_file(path_to_audio_file).set_channels(1)
     rate = sound.frame_rate
-    reduced_noise = nr.reduce_noise(y=sound.get_array_of_samples(), sr=rate, prop_decrease= 0.1)
+    reduced_noise = nr.reduce_noise(y=sound.get_array_of_samples(), sr=rate, prop_decrease= 0.3)
     ts = str(datetime.timestamp(datetime.now()) * 1000).split('.')[0]
     wavfile.write(os.path.join(output_dir, ts + "_" + file_name), 16000, reduced_noise)
     return None

From fc3e7930d8e0d0dd90e1bc084108858a5e9b0aa3 Mon Sep 17 00:00:00 2001
From: ivanshin <i.shingel@outlook.com>
Date: Wed, 19 Jul 2023 14:13:58 +0300
Subject: [PATCH 3/6] whisper implementation

---
 components/transcriber.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/components/transcriber.py b/components/transcriber.py
index f44e27f..a6d9d97 100644
--- a/components/transcriber.py
+++ b/components/transcriber.py
@@ -4,12 +4,14 @@
 from typing import Text, Union
 from pathlib import Path
 from huggingsound import SpeechRecognitionModel
+import whisper
 import torch
 import json
 import os
 import gc
 
 SERVICE_NAME = 'TRANSCRIBER'
+BATCH_SIZE = 32
 
 def transcribe_audio(
     path_to_audio_file: Union[Text,Path],
@@ -18,7 +20,7 @@ def transcribe_audio(
     """ Transcribe single audio file """
 
     file_name = path_to_audio_file.split(os.sep)[-1] 
-    transcription = model.transcribe([path_to_audio_file])
+    transcription = model.transcribe(path_to_audio_file, language='ru', fp16=True)
     with open(os.path.join(output_dir, file_name.split('.')[0] + '.json'), 'w', encoding='utf8') as out_file:
         json.dump(transcription, out_file, ensure_ascii= False)
     return None
@@ -26,7 +28,11 @@ def transcribe_audio(
 def transcriber_worker(configs_dict, queue, logs_queue) -> None:
     """ Daemon cleaner worker """
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian", device)
+    #check or load model
+    model = whisper.load_model("large-v2", device)
+    #model = whisper.load_model("medium", device)
+    #model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian", device)
+    f_path = []
     while True:
         if not queue.empty():
             f_path = queue.get()

From 299c32c7c205cb53e5fba70a4bb09c6c56aef4ca Mon Sep 17 00:00:00 2001
From: ivanshin <i.shingel@outlook.com>
Date: Sun, 20 Aug 2023 18:04:40 +0300
Subject: [PATCH 4/6] multiple transcribers support added

---
 components/config_structure.py | 25 ++++++++++++++++++++++++
 components/configs_reader.py   | 34 +++++++++++++--------------------
 components/logs_writer.py      |  4 ++--
 components/noisereducer.py     |  2 +-
 components/transcriber.py      | 19 ++++++++++--------
 config.yaml                    | 15 +++++++++++++++
 main.py                        | 35 ++++++++++++++++++++++++++--------
 7 files changed, 94 insertions(+), 40 deletions(-)
 create mode 100644 components/config_structure.py

diff --git a/components/config_structure.py b/components/config_structure.py
new file mode 100644
index 0000000..ff943bc
--- /dev/null
+++ b/components/config_structure.py
@@ -0,0 +1,25 @@
+from typing import Text, Union, Dict, Iterable
+from pathlib import Path
+from pydantic import BaseModel, model_validator
+
+# Base model for configuration file
+class ConfigStructure(BaseModel):
+    working_dir: Union[Text,Path]
+    output_dir: Union[Text,Path]
+    clean_audio_dir: Union[Text,Path] = None
+    model: Union[Text,None] = 'large-v2'
+    devices: Union[Text, Iterable, None] = 'cpu'
+    logs_to_db: bool
+    logs_db_path: Union[Text,Path,None]
+    class Config:
+        extra = 'forbid'
+        validate_assigment = True
+
+    @model_validator(mode= 'before')
+    @classmethod
+    def set_null_feilds(cls, field_values):
+        if field_values['devices'] is None:
+            field_values['devices'] = ['cpu']
+        if field_values['model'] is None:
+            field_values['model'] = 'tiny'
+        return field_values
\ No newline at end of file
diff --git a/components/configs_reader.py b/components/configs_reader.py
index 2493172..98948b3 100644
--- a/components/configs_reader.py
+++ b/components/configs_reader.py
@@ -4,45 +4,37 @@
 """
 from typing import Text, Union, Dict
 from pathlib import Path
-from pydantic import BaseModel
+from components.config_structure import ConfigStructure
 import os
 import yaml
 
-# Base model for configuration file
-class ConfigStructure(BaseModel):
-    working_dir: Union[Text,Path]
-    output_dir: Union[Text,Path]
-    logs_to_db: bool
-    logs_db_path: Union[Text,Path,None]
-    class Config:
-        extra = 'forbid'
 
-
-def create_dirs(configs_dict: Dict) -> None:
+def create_dirs(configs_dict: ConfigStructure) -> None:
     """ Create necessary directories """
-    clean_audio_path = os.path.join(configs_dict['working_dir'], 'CLEAN_AUDIO')
+    clean_audio_path = os.path.join(configs_dict.working_dir, 'CLEAN_AUDIO')
     if not os.path.exists(clean_audio_path):
         os.makedirs(clean_audio_path)
-    configs_dict['clean_audio_dir'] = clean_audio_path
-    if not os.path.exists(configs_dict['output_dir']):
-        os.makedirs(configs_dict['output_dir'])
-    if configs_dict['logs_to_db'] == True:
-        if not os.path.exists(configs_dict['logs_db_path']):
-            os.makedirs(configs_dict['logs_db_path'])
+    configs_dict.clean_audio_dir = clean_audio_path
+    if not os.path.exists(configs_dict.output_dir):
+        os.makedirs(configs_dict.output_dir)
+    if configs_dict.logs_to_db == True:
+        if not os.path.exists(configs_dict.logs_db_path):
+            os.makedirs(configs_dict.logs_db_path)
     
     return
 
 def validate(configs_dict: Dict) -> None:
     valid_conf_model = ConfigStructure(**configs_dict)
-    return
+    return valid_conf_model
 
-def read_configs(config_file: Union[Text,Path] = "config.yaml") -> Dict:
+def read_configs(config_file: Union[Text,Path] = "config.yaml") -> ConfigStructure:
     """ Read configs"""
     configs_dict = dict()
     with open(config_file, "r") as stream:
         try:
             configs_dict = yaml.safe_load(stream)
-            validate(configs_dict)
+            configs_dict = validate(configs_dict)
+            configs_dict.devices = list(configs_dict.devices)
             create_dirs(configs_dict)
             return configs_dict
         except yaml.YAMLError as exc:
diff --git a/components/logs_writer.py b/components/logs_writer.py
index 31f4e02..7791ae4 100644
--- a/components/logs_writer.py
+++ b/components/logs_writer.py
@@ -13,9 +13,9 @@ def configure_loger(APP_CONFIGS) -> logging.Logger:
     attributes_list = ['asctime', 'levelname', 'service_name', 'message']
     formatter = logging.Formatter('%(' + ((')s' + db_logs_handler.DEFAULT_SEPARATOR + '%(').join(attributes_list)) + ')s')
 
-    if APP_CONFIGS['logs_to_db'] == True:
+    if APP_CONFIGS.logs_to_db == True:
         logger.propagate = False
-        database =  os.path.join(APP_CONFIGS['logs_db_path'], 'LOGS.db')
+        database =  os.path.join(APP_CONFIGS.logs_db_path, 'LOGS.db')
         table = 'asr_logs'
         sql_handler = db_logs_handler.SQLiteHandler(database = database, table = table, attributes_list = attributes_list)
         sql_handler.setLevel(logging.INFO)
diff --git a/components/noisereducer.py b/components/noisereducer.py
index 7e2ff18..87562f2 100644
--- a/components/noisereducer.py
+++ b/components/noisereducer.py
@@ -32,7 +32,7 @@ def cleaner_worker(configs_dict, queue, logs_queue) -> None:
         if not queue.empty():
             f_path = queue.get()
             logs_queue.put(f'{f_path} Clean start' + '|' + SERVICE_NAME)
-            reduce_noise(f_path, configs_dict['clean_audio_dir'])
+            reduce_noise(f_path, configs_dict.clean_audio_dir)
             logs_queue.put(f'{f_path} Clean end'+'|' + SERVICE_NAME)
             os.remove(f_path)
         pass
diff --git a/components/transcriber.py b/components/transcriber.py
index a6d9d97..47f5c4e 100644
--- a/components/transcriber.py
+++ b/components/transcriber.py
@@ -4,6 +4,7 @@
 from typing import Text, Union
 from pathlib import Path
 from huggingsound import SpeechRecognitionModel
+import signal
 import whisper
 import torch
 import json
@@ -25,20 +26,22 @@ def transcribe_audio(
         json.dump(transcription, out_file, ensure_ascii= False)
     return None
 
-def transcriber_worker(configs_dict, queue, logs_queue) -> None:
+
+def transcriber_worker(configs_dict, queue, logs_queue, device) -> None:
     """ Daemon cleaner worker """
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     #check or load model
-    model = whisper.load_model("large-v2", device)
-    #model = whisper.load_model("medium", device)
-    #model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian", device)
+    try:
+        model = whisper.load_model(configs_dict.model, torch.device(device))
+    except RuntimeError as e:
+        print(e)
+        os.kill(os.getppid(), signal.SIGTERM) # kill parent proc
     f_path = []
     while True:
         if not queue.empty():
             f_path = queue.get()
-            logs_queue.put(f'{f_path} Transcribe start' + '|' + SERVICE_NAME)
-            transcribe_audio(f_path, configs_dict['output_dir'], model)
-            logs_queue.put(f'{f_path} Transcribe end' + '|' + SERVICE_NAME)
+            logs_queue.put(f'{f_path} Transcribe start' + '|' + SERVICE_NAME + f'_on_{device}_{os.getpid()}')
+            transcribe_audio(f_path, configs_dict.output_dir, model)
+            logs_queue.put(f'{f_path} Transcribe end' + '|' + SERVICE_NAME + f'_on_{device}')
             torch.cuda.empty_cache()
             gc.collect()
             os.remove(f_path)
diff --git a/config.yaml b/config.yaml
index fc637e0..a5a22a0 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,5 +1,20 @@
 #YAML
+# [directories settings]
 working_dir: /media/ivan/Диск/WAV2VEC_WD
 output_dir: /media/ivan/Диск/WAV2VEC_OUTPUT
+
+# [transcriber settings]
+model: #large-v2 # 'tiny' by default if blank (Whisper model settings)
+
+# [multiple GPU support]
+devices:
+  - cpu
+  # set your cuda devices:  #- cuda:0
+                            #- cuda:1 
+  # OR
+  #set blank if CPU (set same device multiple times to spawn more workers:   #- cpu
+                                                                             #- cpu)
+
+# [logging settings]
 logs_to_db: True # False
 logs_db_path: /media/ivan/Диск/WAV2VEC_DB # set path where to store SQLite DB if logs_to_db = True| else None
diff --git a/main.py b/main.py
index ae0a676..c7187a1 100644
--- a/main.py
+++ b/main.py
@@ -17,22 +17,40 @@
     
     # subprocesses
     # 1) watchdog with queue to cleaning
-    watchdog_cleaner_proc = mp.Process(target= create_observer, args= (APP_CONFIGS['working_dir'], queue_to_cleaning))
+    watchdog_cleaner_proc = mp.Process(target= create_observer, args= (APP_CONFIGS.working_dir,\
+                                                                       queue_to_cleaning))
     watchdog_cleaner_proc.daemon= True
     watchdog_cleaner_proc.start()
     ##watchdog_proc.join()
+
+
     ## 2) cleaner
-    cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS, queue_to_cleaning, logs_queue))
+    cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS,\
+                                                        queue_to_cleaning,\
+                                                        logs_queue))
     cleaner.daemon= True
     cleaner.start()
+
+
     # 3) watchdog with queue to transcribation
-    watchdog_transcribe_proc = mp.Process(target= create_observer, args= (APP_CONFIGS['clean_audio_dir'], queue_to_transcribe))
+    watchdog_transcribe_proc = mp.Process(target= create_observer, args= (APP_CONFIGS.clean_audio_dir,\
+                                                                          queue_to_transcribe))
     watchdog_transcribe_proc.daemon= True
     watchdog_transcribe_proc.start()
-    # 4) Russian wav2vec implementation
-    transcriber_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS, queue_to_transcribe, logs_queue))
-    transcriber_proc.daemon= True
-    transcriber_proc.start()
+
+
+    # 4) Whisper transcriber implementation (multiple GPU support)
+    transcriber_proc = []
+    for device in APP_CONFIGS.devices:
+        t_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS,\
+                                                                             queue_to_transcribe,\
+                                                                             logs_queue,\
+                                                                             device))
+        t_proc.daemon= True
+        transcriber_proc.append(t_proc)
+        t_proc.start()
+
+        
     logging.LoggerAdapter(logger, {'service_name': 'MAIN'}).info('Startup success')
     try:
         while True:
@@ -43,7 +61,8 @@
         watchdog_cleaner_proc.terminate()
         cleaner.terminate()
         watchdog_transcribe_proc.terminate()
-        transcriber_proc.terminate()
+        for proc in transcriber_proc:
+            proc.terminate()
         logging.LoggerAdapter(logger, {'service_name': 'MAIN'}).info('All processes terminated')
 
 

From 645eaa43ca183e4336725e3cb12cc1145c1f1be9 Mon Sep 17 00:00:00 2001
From: ivanshin <i.shingel@outlook.com>
Date: Sun, 20 Aug 2023 18:06:13 +0300
Subject: [PATCH 5/6] upd readme

---
 README.md | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 99f9796..408c639 100644
--- a/README.md
+++ b/README.md
@@ -10,10 +10,25 @@ set your working directory for app and ouput directory:
 
 ```yaml
 #YAML EXAMPLE
-working_dir: D:\\WAV2VEC_ASR_WD\
-output_dir: D:\\WAV2VEC_ASR_OUPUT\
+# [directories settings]
+working_dir: /media/ivan/Диск/WAV2VEC_WD
+output_dir: /media/ivan/Диск/WAV2VEC_OUTPUT
+
+# [transcriber settings]
+model: #large-v2 # 'tiny' by default if blank (Whisper model settings)
+
+# [multiple GPU support]
+devices:
+  - cpu
+  # set your cuda devices:  #- cuda:0
+                            #- cuda:1 
+  # OR
+  #set blank if CPU (set same device multiple times to spawn more workers:   #- cpu
+                                                                             #- cpu)
+
+# [logging settings]
 logs_to_db: True # False
-logs_db_path: D:\\WAV2VEC_ASR_DB\ # set path where to store SQLite DB if logs_to_db = True else None
+logs_db_path: /media/ivan/Диск/WAV2VEC_DB # set path where to store SQLite DB if logs_to_db = True| else None
 ```
 
 2) Create venv and activate:

From f42c278b3799c75534fd878fcd67faead74cec7b Mon Sep 17 00:00:00 2001
From: ivanshin <i.shingel@outlook.com>
Date: Sun, 20 Aug 2023 18:07:26 +0300
Subject: [PATCH 6/6] upd req

---
 requirements.txt | Bin 2496 -> 2876 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 7d2f4adf055889d8fb152ceab1a5141561c18297..0f6e0bb9ee954ef87d57ed256dc6d3e4a8508272 100644
GIT binary patch
delta 378
zcmZ9IKT88a5XE04K@m~~Y;0^&h}>nfi7^p`RJQ4CtnR{H67uK5{n66aR_==kDQqnK
z9QtMa-9}Jk*qPZkvv20@N9+CM^{#%NC{-j+3+2QVpI|Fxy1}|K^6|b*IjMg=CEC)p
zQ9!?^N6m?u!&04T%$Yf+3o1FbrFyLnw#Vlx6v8%T#v1f5U*{j6|MO<d#GwYVvbTyg
zVt5QeA2=lMss|Q=T@eRVtXn(QCAo3=(0RkHPnX@b8*c}bhz$a?<ZL`e&4bgQaZ5le
z_10;xe!!@yW3)tc7pVIEC67NVsO05Kqg8$$KA$eMboC|Od!CA+SDoGcQBeB^!-+-a

delta 30
mcmdlZc0hPT7|Y}~mMfc8*fuduw&HBt{D?D(k%gCmiva+#mkFW(