From 6d079bd0dbca4b211dc176747bbb2d1ce0f263f4 Mon Sep 17 00:00:00 2001 From: ivanshin Date: Wed, 5 Jul 2023 11:19:05 +0300 Subject: [PATCH 1/7] config validation and audio type check --- components/configs_reader.py | 12 ++++++++++-- components/watchdog_daemon.py | 3 ++- requirements.txt | Bin 0 -> 2492 bytes 3 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 requirements.txt diff --git a/components/configs_reader.py b/components/configs_reader.py index 4dc8631..e305407 100644 --- a/components/configs_reader.py +++ b/components/configs_reader.py @@ -4,9 +4,18 @@ """ from typing import Text, Union, Dict from pathlib import Path +from pydantic import BaseModel import os import yaml +# Base model for configuration file +class ConfigStructure(BaseModel): + working_dir: Union[Text,Path] + output_dir: Union[Text,Path] + class Config: + extra = 'forbid' + + def create_dirs(configs_dict: Dict) -> None: """ Create necessary directories """ clean_audio_path = os.path.join(configs_dict['working_dir'], 'CLEAN_AUDIO') @@ -18,8 +27,7 @@ def create_dirs(configs_dict: Dict) -> None: return def validate(configs_dict: Dict) -> None: - #TODO: using Pydantic validate config dictionary - # https://stackoverflow.com/questions/45812387/how-to-validate-structure-or-schema-of-dictionary-in-python + valid_conf_model = ConfigStructure(**configs_dict) return def read_configs(config_file: Union[Text,Path] = "config.yaml") -> Dict: diff --git a/components/watchdog_daemon.py b/components/watchdog_daemon.py index 57559fb..a458a10 100644 --- a/components/watchdog_daemon.py +++ b/components/watchdog_daemon.py @@ -1,5 +1,6 @@ from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler +import mimetypes as mime """ Create Handler for new audio files in working directory""" class AudioFilesHandler(FileSystemEventHandler): @@ -9,7 +10,7 @@ def __init__(self, queue) -> None: def on_created(self, event): #if event.is_directory == False: - if event: + if event and ('audio' in mime.guess_type(event.src_path)[0]): self.queue.put(event.src_path) print("added_to_queue", event.src_path) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c5a04939b7e108356cfa4c0681340d54071803ef GIT binary patch literal 2492 zcmZveNpBNT5QN_uiJxK=+gaeifjdZCIYo|_WD?tBJT{5*Gw%^%hHAag?z8{T=f1s*%rdw_jd3u<8jPRr?+JuI8E6Ir!7Ze!Vf?2COsC+6KB%1 zs*#Qnr=uCla}dmCF3VmROvMb%#u3E~7DA?GojeYGB_`ohYrBvQHZURDKH{K@W5Je6uJuWloHk1oBR;ugrp6|Y zE4Y%4Oh6}VcZ|ECR%xZkZD_fbPa`jPgEgZYVY1TqUG&{N*wjj1E1t7{P$%8%(uQ`8N8NDamSkh8`8`Suv~~nO8QCLc5EM96R%I zXFbJ9vPGF+XAHdRysQ=RRUL3EpH_cUGsl9RRYtSB(6H z>&xyzBeG`u9HXN6qO~hTcWh6v>yFaMx};|O^C6aqas+>_J6F2#q* z+9;xTF@493Is2`*tnP>;rJ#)|s9D6dC6|SaWs_tgvsE zVgI|w&E+8V7g82{-+*;;xclkrPU@m5?+U3bzSGFeHSjx$d-7I(FXw9T4g^L Date: Wed, 5 Jul 2023 11:24:00 +0300 Subject: [PATCH 2/7] todo: logging --- main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 0db3fa9..ee2e79f 100644 --- a/main.py +++ b/main.py @@ -5,7 +5,8 @@ from time import sleep import multiprocessing as mp - +#TODO: Add logging to simple DB, example: +#(https://stackoverflow.com/questions/2314307/python-logging-to-database) APP_CONFIGS = read_configs() # read configuration file if __name__ == '__main__': From f38a0389c30336a2d2183b3b06c19cd6388524ea Mon Sep 17 00:00:00 2001 From: ivanshin Date: Wed, 5 Jul 2023 12:57:56 +0300 Subject: [PATCH 3/7] logging to db baseline --- components/configs_reader.py | 10 ++++-- components/db_logs_handler.py | 60 +++++++++++++++++++++++++++++++++++ components/logs_writer.py | 37 +++++++++++++++++++++ config.yaml | 4 ++- critical.log | 0 error.log | 0 main.py | 9 ++++-- 7 files changed, 115 insertions(+), 5 deletions(-) create mode 100644 components/db_logs_handler.py create mode 100644 components/logs_writer.py create mode 100644 critical.log create mode 100644 error.log diff --git a/components/configs_reader.py b/components/configs_reader.py index e305407..2493172 100644 --- a/components/configs_reader.py +++ b/components/configs_reader.py @@ -12,6 +12,8 @@ class ConfigStructure(BaseModel): working_dir: Union[Text,Path] output_dir: Union[Text,Path] + logs_to_db: bool + logs_db_path: Union[Text,Path,None] class Config: extra = 'forbid' @@ -19,11 +21,15 @@ class Config: def create_dirs(configs_dict: Dict) -> None: """ Create necessary directories """ clean_audio_path = os.path.join(configs_dict['working_dir'], 'CLEAN_AUDIO') - if not os.path.exists(clean_audio_path): + if not os.path.exists(clean_audio_path): os.makedirs(clean_audio_path) configs_dict['clean_audio_dir'] = clean_audio_path - if not os.path.exists(configs_dict['output_dir']): + if not os.path.exists(configs_dict['output_dir']): os.makedirs(configs_dict['output_dir']) + if configs_dict['logs_to_db'] == True: + if not os.path.exists(configs_dict['logs_db_path']): + os.makedirs(configs_dict['logs_db_path']) + return def validate(configs_dict: Dict) -> None: diff --git a/components/db_logs_handler.py b/components/db_logs_handler.py new file mode 100644 index 0000000..7df6c14 --- /dev/null +++ b/components/db_logs_handler.py @@ -0,0 +1,60 @@ +import logging +import sqlite3 + + +DEFAULT_SEPARATOR = '|' +DEFAULT_DATA_TYPE = 'TEXT' + + +#WARNING: attributes must be choosen from https://docs.python.org/3/library/logging.html#formatter-objects +DEFAULT_ATTRIBUTES_LIST = ['asctime', 'levelname', 'name', 'message'] + +class SQLiteHandler(logging.Handler): + def __init__(self, database, table, attributes_list): + ''' + SQLiteHandler class constructor + Parameters: + self: instance of the class + database: database + table: log table name + attributes_list: log table columns + Returns: + None + ''' + #super(SQLiteHandler, self).__init__() # for python 2.X + super().__init__() # for python 3.X + self.database = database + self.table = table + self.attributes = attributes_list + + # Create table if needed + create_table_sql = 'CREATE TABLE IF NOT EXISTS ' + self.table + ' (' + ((' ' + DEFAULT_DATA_TYPE + ', ').join(self.attributes)) + ' ' + DEFAULT_DATA_TYPE + ');' + #print(create_table_sql) + conn = sqlite3.connect(self.database) + conn.execute(create_table_sql) + conn.commit() + conn.close() + + def emit(self, record): + ''' + Save the log record + Parameters: + self: instance of the class + record: log record to be saved + Returns: + None + ''' + # Use default formatting if no formatter is set + self.format(record) + + #print(record.__dict__) + record_values = [record.__dict__[k] for k in self.attributes] + str_record_values = ', '.join("'{0}'".format(v.replace("'", '').replace('"', '').replace('\n', ' ')) for v in record_values) + #print(str_record_values) + + insert_sql = 'INSERT INTO ' + self.table + ' (' + (', '.join(self.attributes)) + ') VALUES (' + str_record_values + ');' + #print(insert_sql) + conn = sqlite3.connect(self.database) + conn.execute(insert_sql) + conn.commit() + conn.close() \ No newline at end of file diff --git a/components/logs_writer.py b/components/logs_writer.py new file mode 100644 index 0000000..27bd16a --- /dev/null +++ b/components/logs_writer.py @@ -0,0 +1,37 @@ +from components import db_logs_handler +import os +import logging + +def configure_loger(APP_CONFIGS): + if APP_CONFIGS['logs_to_db'] == False: + return logging.Logger('default_loger') + database = os.path.join(APP_CONFIGS['logs_db_path'], 'LOGS.db') + table = 'log' + logger = logging.getLogger(__name__) + logger.setLevel(logging.INFO) + + + attributes_list = ['asctime', 'levelname', 'message'] + formatter = logging.Formatter('%(' + ((')s' + db_logs_handler.DEFAULT_SEPARATOR + '%(').join(attributes_list)) + ')s') + + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + console_handler.setFormatter(formatter) + + sql_handler = db_logs_handler.SQLiteHandler(database = database, table = table, attributes_list = attributes_list) + sql_handler.setLevel(logging.INFO) + sql_handler.setFormatter(formatter) + + error_file_handler = logging.FileHandler('error.log') + error_file_handler.setLevel(logging.ERROR) + error_file_handler.setFormatter(formatter) + + critical_file_handler = logging.FileHandler('critical.log') + critical_file_handler.setLevel(logging.CRITICAL) + critical_file_handler.setFormatter(formatter) + + logger.addHandler(console_handler) + logger.addHandler(sql_handler) + logger.addHandler(error_file_handler) + logger.addHandler(critical_file_handler) + return logger \ No newline at end of file diff --git a/config.yaml b/config.yaml index 4a00f8c..27932c5 100644 --- a/config.yaml +++ b/config.yaml @@ -1,3 +1,5 @@ #YAML working_dir: D:\\WAV2VEC_ASR_WD\ -output_dir: D:\\WAV2VEC_ASR_OUPUT\ \ No newline at end of file +output_dir: D:\\WAV2VEC_ASR_OUPUT\ +logs_to_db: True # False +logs_db_path: D:\\WAV2VEC_ASR_DB\ # set path where to store SQLite DB if logs_to_db = True| else None \ No newline at end of file diff --git a/critical.log b/critical.log new file mode 100644 index 0000000..e69de29 diff --git a/error.log b/error.log new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py index ee2e79f..586026d 100644 --- a/main.py +++ b/main.py @@ -2,12 +2,15 @@ from components.watchdog_daemon import create_observer from components.noisereducer import cleaner_worker from components.transcriber import transcriber_worker -from time import sleep +from components.logs_writer import configure_loger import multiprocessing as mp +import os +import logging #TODO: Add logging to simple DB, example: #(https://stackoverflow.com/questions/2314307/python-logging-to-database) APP_CONFIGS = read_configs() # read configuration file +logger = configure_loger(APP_CONFIGS) if __name__ == '__main__': # variables @@ -31,7 +34,8 @@ # 4) Russian wav2vec implementation transcriber_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS, queue_to_transcribe)) transcriber_proc.daemon= True - transcriber_proc.start() + transcriber_proc.start() + logger.info('Startup success') try: while True: pass @@ -40,5 +44,6 @@ cleaner.terminate() watchdog_transcribe_proc.terminate() transcriber_proc.terminate() + logger.info('All processes terminated') From d7f5de63d1908d0c3a2f8a0aa8ae474429cade97 Mon Sep 17 00:00:00 2001 From: ivanshin Date: Wed, 5 Jul 2023 13:29:51 +0300 Subject: [PATCH 4/7] logger bugfix --- components/logs_writer.py | 26 ++++++++++++++++---------- config.yaml | 2 +- main.py | 8 ++------ 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/components/logs_writer.py b/components/logs_writer.py index 27bd16a..0e28e02 100644 --- a/components/logs_writer.py +++ b/components/logs_writer.py @@ -1,16 +1,25 @@ +""" Logger configurer depends on `logs_to_db` option + in configuretion file: + True -> write to set path db + False -> default console logging +""" from components import db_logs_handler import os import logging -def configure_loger(APP_CONFIGS): - if APP_CONFIGS['logs_to_db'] == False: - return logging.Logger('default_loger') - database = os.path.join(APP_CONFIGS['logs_db_path'], 'LOGS.db') - table = 'log' +def configure_loger(APP_CONFIGS) -> logging.Logger: + + if APP_CONFIGS['logs_to_db'] == True: + database = os.path.join(APP_CONFIGS['logs_db_path'], 'LOGS.db') + table = 'log' + sql_handler = db_logs_handler.SQLiteHandler(database = database, table = table, attributes_list = attributes_list) + sql_handler.setLevel(logging.INFO) + sql_handler.setFormatter(formatter) + logger.addHandler(sql_handler) + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) - attributes_list = ['asctime', 'levelname', 'message'] formatter = logging.Formatter('%(' + ((')s' + db_logs_handler.DEFAULT_SEPARATOR + '%(').join(attributes_list)) + ')s') @@ -18,9 +27,7 @@ def configure_loger(APP_CONFIGS): console_handler.setLevel(logging.DEBUG) console_handler.setFormatter(formatter) - sql_handler = db_logs_handler.SQLiteHandler(database = database, table = table, attributes_list = attributes_list) - sql_handler.setLevel(logging.INFO) - sql_handler.setFormatter(formatter) + error_file_handler = logging.FileHandler('error.log') error_file_handler.setLevel(logging.ERROR) @@ -31,7 +38,6 @@ def configure_loger(APP_CONFIGS): critical_file_handler.setFormatter(formatter) logger.addHandler(console_handler) - logger.addHandler(sql_handler) logger.addHandler(error_file_handler) logger.addHandler(critical_file_handler) return logger \ No newline at end of file diff --git a/config.yaml b/config.yaml index 27932c5..e43e7fa 100644 --- a/config.yaml +++ b/config.yaml @@ -1,5 +1,5 @@ #YAML working_dir: D:\\WAV2VEC_ASR_WD\ output_dir: D:\\WAV2VEC_ASR_OUPUT\ -logs_to_db: True # False +logs_to_db: False # False logs_db_path: D:\\WAV2VEC_ASR_DB\ # set path where to store SQLite DB if logs_to_db = True| else None \ No newline at end of file diff --git a/main.py b/main.py index 586026d..a25d398 100644 --- a/main.py +++ b/main.py @@ -4,15 +4,11 @@ from components.transcriber import transcriber_worker from components.logs_writer import configure_loger import multiprocessing as mp -import os -import logging -#TODO: Add logging to simple DB, example: -#(https://stackoverflow.com/questions/2314307/python-logging-to-database) -APP_CONFIGS = read_configs() # read configuration file -logger = configure_loger(APP_CONFIGS) +APP_CONFIGS = read_configs() # read and validate configuration file if __name__ == '__main__': + logger = configure_loger(APP_CONFIGS) # variables queue_to_cleaning = mp.Queue() queue_to_transcribe = mp.Queue() From 1944d1610312085a3e557b6893e31f73674130e5 Mon Sep 17 00:00:00 2001 From: ivanshin Date: Wed, 5 Jul 2023 15:16:36 +0300 Subject: [PATCH 5/7] logging doesn't work (wrong implementation) --- components/logs_writer.py | 11 +++++------ components/noisereducer.py | 4 +++- components/transcriber.py | 4 +++- config.yaml | 2 +- main.py | 6 ++++-- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/components/logs_writer.py b/components/logs_writer.py index 0e28e02..149d94b 100644 --- a/components/logs_writer.py +++ b/components/logs_writer.py @@ -8,6 +8,11 @@ import logging def configure_loger(APP_CONFIGS) -> logging.Logger: + logger = logging.getLogger(__name__) + logger.setLevel(logging.INFO) + + attributes_list = ['asctime', 'levelname', 'message'] + formatter = logging.Formatter('%(' + ((')s' + db_logs_handler.DEFAULT_SEPARATOR + '%(').join(attributes_list)) + ')s') if APP_CONFIGS['logs_to_db'] == True: database = os.path.join(APP_CONFIGS['logs_db_path'], 'LOGS.db') @@ -17,12 +22,6 @@ def configure_loger(APP_CONFIGS) -> logging.Logger: sql_handler.setFormatter(formatter) logger.addHandler(sql_handler) - logger = logging.getLogger(__name__) - logger.setLevel(logging.INFO) - - attributes_list = ['asctime', 'levelname', 'message'] - formatter = logging.Formatter('%(' + ((')s' + db_logs_handler.DEFAULT_SEPARATOR + '%(').join(attributes_list)) + ')s') - console_handler = logging.StreamHandler() console_handler.setLevel(logging.DEBUG) console_handler.setFormatter(formatter) diff --git a/components/noisereducer.py b/components/noisereducer.py index 1d81477..7f8ee3e 100644 --- a/components/noisereducer.py +++ b/components/noisereducer.py @@ -22,11 +22,13 @@ def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Pa wavfile.write(os.path.join(output_dir, ts + "_" + file_name), rate, reduced_noise) return None -def cleaner_worker(configs_dict, queue) -> None: +def cleaner_worker(configs_dict, queue, logger) -> None: """ Daemon cleaner worker """ while True: if not queue.empty(): f_path = queue.get() + logger.info(f'{f_path} Clean start') reduce_noise(f_path, configs_dict['clean_audio_dir']) + logger.info(f'{f_path} Clean end') os.remove(f_path) pass diff --git a/components/transcriber.py b/components/transcriber.py index e338d89..bbdf4b9 100644 --- a/components/transcriber.py +++ b/components/transcriber.py @@ -19,12 +19,14 @@ def transcribe_audio( json.dump(transcription, out_file, ensure_ascii= False) return None -def transcriber_worker(configs_dict, queue) -> None: +def transcriber_worker(configs_dict, queue, logger) -> None: """ Daemon cleaner worker """ model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian") while True: if not queue.empty(): f_path = queue.get() + logger.info(f'{f_path} Transcribe start') transcribe_audio(f_path, configs_dict['output_dir'], model) + logger.info(f'{f_path} Transcribe end') os.remove(f_path) pass \ No newline at end of file diff --git a/config.yaml b/config.yaml index e43e7fa..27932c5 100644 --- a/config.yaml +++ b/config.yaml @@ -1,5 +1,5 @@ #YAML working_dir: D:\\WAV2VEC_ASR_WD\ output_dir: D:\\WAV2VEC_ASR_OUPUT\ -logs_to_db: False # False +logs_to_db: True # False logs_db_path: D:\\WAV2VEC_ASR_DB\ # set path where to store SQLite DB if logs_to_db = True| else None \ No newline at end of file diff --git a/main.py b/main.py index a25d398..a1a50d1 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,8 @@ from components.logs_writer import configure_loger import multiprocessing as mp +#TODO: Create correct multiprocessing logging mechanism +# (https://stackoverflow.com/questions/641420/how-should-i-log-while-using-multiprocessing-in-python) APP_CONFIGS = read_configs() # read and validate configuration file if __name__ == '__main__': @@ -20,7 +22,7 @@ watchdog_cleaner_proc.start() #watchdog_proc.join() # 2) cleaner - cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS, queue_to_cleaning)) + cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS, queue_to_cleaning, logger)) cleaner.daemon= True cleaner.start() # 3) watchdog with queue to transcribation @@ -28,7 +30,7 @@ watchdog_transcribe_proc.daemon= True watchdog_transcribe_proc.start() # 4) Russian wav2vec implementation - transcriber_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS, queue_to_transcribe)) + transcriber_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS, queue_to_transcribe, logger)) transcriber_proc.daemon= True transcriber_proc.start() logger.info('Startup success') From 6af54cfd737ab9dfd08f5d19f4071d78ad23714c Mon Sep 17 00:00:00 2001 From: ivanshin Date: Wed, 5 Jul 2023 16:43:03 +0300 Subject: [PATCH 6/7] logging queue complete --- components/noisereducer.py | 6 +++--- components/transcriber.py | 6 +++--- main.py | 10 ++++++---- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/components/noisereducer.py b/components/noisereducer.py index 7f8ee3e..62dc720 100644 --- a/components/noisereducer.py +++ b/components/noisereducer.py @@ -22,13 +22,13 @@ def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Pa wavfile.write(os.path.join(output_dir, ts + "_" + file_name), rate, reduced_noise) return None -def cleaner_worker(configs_dict, queue, logger) -> None: +def cleaner_worker(configs_dict, queue, logs_queue) -> None: """ Daemon cleaner worker """ while True: if not queue.empty(): f_path = queue.get() - logger.info(f'{f_path} Clean start') + logs_queue.put(f'{f_path} Clean start') reduce_noise(f_path, configs_dict['clean_audio_dir']) - logger.info(f'{f_path} Clean end') + logs_queue.put(f'{f_path} Clean end') os.remove(f_path) pass diff --git a/components/transcriber.py b/components/transcriber.py index bbdf4b9..1129f0b 100644 --- a/components/transcriber.py +++ b/components/transcriber.py @@ -19,14 +19,14 @@ def transcribe_audio( json.dump(transcription, out_file, ensure_ascii= False) return None -def transcriber_worker(configs_dict, queue, logger) -> None: +def transcriber_worker(configs_dict, queue, logs_queue) -> None: """ Daemon cleaner worker """ model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-russian") while True: if not queue.empty(): f_path = queue.get() - logger.info(f'{f_path} Transcribe start') + logs_queue.put((f'{f_path} Transcribe start')) transcribe_audio(f_path, configs_dict['output_dir'], model) - logger.info(f'{f_path} Transcribe end') + logs_queue.put(f'{f_path} Transcribe end') os.remove(f_path) pass \ No newline at end of file diff --git a/main.py b/main.py index a1a50d1..ada39e7 100644 --- a/main.py +++ b/main.py @@ -12,8 +12,9 @@ if __name__ == '__main__': logger = configure_loger(APP_CONFIGS) # variables - queue_to_cleaning = mp.Queue() - queue_to_transcribe = mp.Queue() + logs_queue = mp.Queue(-1) + queue_to_cleaning = mp.Queue(-1) + queue_to_transcribe = mp.Queue(-1) # subprocesses # 1) watchdog with queue to cleaning @@ -22,7 +23,7 @@ watchdog_cleaner_proc.start() #watchdog_proc.join() # 2) cleaner - cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS, queue_to_cleaning, logger)) + cleaner = mp.Process(target= cleaner_worker, args= (APP_CONFIGS, queue_to_cleaning, logs_queue)) cleaner.daemon= True cleaner.start() # 3) watchdog with queue to transcribation @@ -30,12 +31,13 @@ watchdog_transcribe_proc.daemon= True watchdog_transcribe_proc.start() # 4) Russian wav2vec implementation - transcriber_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS, queue_to_transcribe, logger)) + transcriber_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS, queue_to_transcribe, logs_queue)) transcriber_proc.daemon= True transcriber_proc.start() logger.info('Startup success') try: while True: + logger.info(logs_queue.get()) pass except KeyboardInterrupt: watchdog_cleaner_proc.terminate() From b01a6abdf4cf9a5e45923abbd5534172d18681ad Mon Sep 17 00:00:00 2001 From: ivanshin Date: Fri, 7 Jul 2023 12:39:26 +0300 Subject: [PATCH 7/7] logging works fine --- components/db_logs_handler.py | 1 - components/logs_writer.py | 34 +++++++++++++++++----------------- components/noisereducer.py | 6 ++++-- components/transcriber.py | 6 ++++-- main.py | 10 +++++----- 5 files changed, 30 insertions(+), 27 deletions(-) diff --git a/components/db_logs_handler.py b/components/db_logs_handler.py index 7df6c14..e333fe9 100644 --- a/components/db_logs_handler.py +++ b/components/db_logs_handler.py @@ -29,7 +29,6 @@ def __init__(self, database, table, attributes_list): # Create table if needed create_table_sql = 'CREATE TABLE IF NOT EXISTS ' + self.table + ' (' + ((' ' + DEFAULT_DATA_TYPE + ', ').join(self.attributes)) + ' ' + DEFAULT_DATA_TYPE + ');' - #print(create_table_sql) conn = sqlite3.connect(self.database) conn.execute(create_table_sql) conn.commit() diff --git a/components/logs_writer.py b/components/logs_writer.py index 149d94b..31f4e02 100644 --- a/components/logs_writer.py +++ b/components/logs_writer.py @@ -10,33 +10,33 @@ def configure_loger(APP_CONFIGS) -> logging.Logger: logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) - - attributes_list = ['asctime', 'levelname', 'message'] + attributes_list = ['asctime', 'levelname', 'service_name', 'message'] formatter = logging.Formatter('%(' + ((')s' + db_logs_handler.DEFAULT_SEPARATOR + '%(').join(attributes_list)) + ')s') if APP_CONFIGS['logs_to_db'] == True: + logger.propagate = False database = os.path.join(APP_CONFIGS['logs_db_path'], 'LOGS.db') - table = 'log' + table = 'asr_logs' sql_handler = db_logs_handler.SQLiteHandler(database = database, table = table, attributes_list = attributes_list) sql_handler.setLevel(logging.INFO) sql_handler.setFormatter(formatter) logger.addHandler(sql_handler) + else: + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + console_handler.setFormatter(formatter) - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.DEBUG) - console_handler.setFormatter(formatter) - - + - error_file_handler = logging.FileHandler('error.log') - error_file_handler.setLevel(logging.ERROR) - error_file_handler.setFormatter(formatter) + error_file_handler = logging.FileHandler('error.log') + error_file_handler.setLevel(logging.ERROR) + error_file_handler.setFormatter(formatter) - critical_file_handler = logging.FileHandler('critical.log') - critical_file_handler.setLevel(logging.CRITICAL) - critical_file_handler.setFormatter(formatter) + critical_file_handler = logging.FileHandler('critical.log') + critical_file_handler.setLevel(logging.CRITICAL) + critical_file_handler.setFormatter(formatter) - logger.addHandler(console_handler) - logger.addHandler(error_file_handler) - logger.addHandler(critical_file_handler) + logger.addHandler(console_handler) + logger.addHandler(error_file_handler) + logger.addHandler(critical_file_handler) return logger \ No newline at end of file diff --git a/components/noisereducer.py b/components/noisereducer.py index 62dc720..437eec6 100644 --- a/components/noisereducer.py +++ b/components/noisereducer.py @@ -10,6 +10,8 @@ import os import noisereduce as nr +SERVICE_NAME = 'NOISE_CLEANER' + def reduce_noise(path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Path]) -> None: """ Reduce noize from single audio file """ @@ -27,8 +29,8 @@ def cleaner_worker(configs_dict, queue, logs_queue) -> None: while True: if not queue.empty(): f_path = queue.get() - logs_queue.put(f'{f_path} Clean start') + logs_queue.put(f'{f_path} Clean start' + '|' + SERVICE_NAME) reduce_noise(f_path, configs_dict['clean_audio_dir']) - logs_queue.put(f'{f_path} Clean end') + logs_queue.put(f'{f_path} Clean end'+'|' + SERVICE_NAME) os.remove(f_path) pass diff --git a/components/transcriber.py b/components/transcriber.py index 1129f0b..8b7079d 100644 --- a/components/transcriber.py +++ b/components/transcriber.py @@ -7,6 +7,8 @@ import json import os +SERVICE_NAME = 'TRANSCRIBER' + def transcribe_audio( path_to_audio_file: Union[Text,Path], output_dir: Union[Text,Path], @@ -25,8 +27,8 @@ def transcriber_worker(configs_dict, queue, logs_queue) -> None: while True: if not queue.empty(): f_path = queue.get() - logs_queue.put((f'{f_path} Transcribe start')) + logs_queue.put(f'{f_path} Transcribe start' + '|' + SERVICE_NAME) transcribe_audio(f_path, configs_dict['output_dir'], model) - logs_queue.put(f'{f_path} Transcribe end') + logs_queue.put(f'{f_path} Transcribe end' + '|' + SERVICE_NAME) os.remove(f_path) pass \ No newline at end of file diff --git a/main.py b/main.py index ada39e7..675da63 100644 --- a/main.py +++ b/main.py @@ -4,9 +4,8 @@ from components.transcriber import transcriber_worker from components.logs_writer import configure_loger import multiprocessing as mp +import logging -#TODO: Create correct multiprocessing logging mechanism -# (https://stackoverflow.com/questions/641420/how-should-i-log-while-using-multiprocessing-in-python) APP_CONFIGS = read_configs() # read and validate configuration file if __name__ == '__main__': @@ -34,16 +33,17 @@ transcriber_proc = mp.Process(target= transcriber_worker, args= (APP_CONFIGS, queue_to_transcribe, logs_queue)) transcriber_proc.daemon= True transcriber_proc.start() - logger.info('Startup success') + logging.LoggerAdapter(logger, {'service_name': 'MAIN'}).info('Startup success') try: while True: - logger.info(logs_queue.get()) + message= logs_queue.get().split('|') + logging.LoggerAdapter(logger, {'service_name': f'{message[1]}'}).info(message[0]) pass except KeyboardInterrupt: watchdog_cleaner_proc.terminate() cleaner.terminate() watchdog_transcribe_proc.terminate() transcriber_proc.terminate() - logger.info('All processes terminated') + logging.LoggerAdapter(logger, {'service_name': 'MAIN'}).info('All processes terminated')