diff --git a/AeroViz/rawDataReader/core/__init__.py b/AeroViz/rawDataReader/core/__init__.py index 84d6693..c2a7baf 100644 --- a/AeroViz/rawDataReader/core/__init__.py +++ b/AeroViz/rawDataReader/core/__init__.py @@ -1,9 +1,9 @@ import json -import logging from abc import ABC, abstractmethod +from contextlib import contextmanager from datetime import datetime from pathlib import Path -from typing import Optional +from typing import Optional, Generator import numpy as np import pandas as pd @@ -12,6 +12,7 @@ from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn from AeroViz.rawDataReader.config.supported_instruments import meta +from AeroViz.rawDataReader.core.logger import ReaderLogger from AeroViz.rawDataReader.core.qc import DataQualityControl __all__ = ['AbstractReader'] @@ -40,7 +41,7 @@ def __init__(self, self.path = Path(path) self.meta = meta[self.nam] - self.logger = self._setup_logger() + self.logger = ReaderLogger(self.nam, self.path) self.reset = reset self.qc = qc @@ -81,20 +82,6 @@ def _raw_reader(self, file): def _QC(self, df: DataFrame) -> DataFrame: return df - def _setup_logger(self) -> logging.Logger: - logger = logging.getLogger(self.nam) - logger.setLevel(logging.INFO) - - for handler in logger.handlers[:]: - handler.close() - logger.removeHandler(handler) - - handler = logging.FileHandler(self.path / f'{self.nam}.log') - handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')) - logger.addHandler(handler) - - return logger - def _rate_calculate(self, raw_data, qc_data) -> None: def __base_rate(raw_data, qc_data): period_size = len(raw_data.resample('1h').mean().index) @@ -107,31 +94,27 @@ def __base_rate(raw_data, qc_data): # validate rate calculation if period_size == 0 or sample_size == 0 or qc_size == 0: - self.logger.warning(f'\t\t No data for this period... skipping') - print(f'\t\t\033[91m No data for this period... skipping\033[0m') + self.logger.warning(f'\t\t No data for this period... skip') continue - - if period_size < sample_size or sample_size < qc_size: - self.logger.warning( - f'\t\tInvalid size relationship: period={period_size}, sample={sample_size}, QC={qc_size}... skipping') - print( - f'\t\tInvalid size relationship: period={period_size}, sample={sample_size}, QC={qc_size}... skipping') + if period_size < sample_size: + self.logger.warning(f'\t\tError: Sample({sample_size}) > Period({period_size})... skip') + continue + if sample_size < qc_size: + self.logger.warning(f'\t\tError: QC({qc_size}) > Sample({sample_size})... skip') continue else: - _acq_rate = round((sample_size / period_size) * 100, 1) - _yid_rate = round((qc_size / sample_size) * 100, 1) - _OEE_rate = round((qc_size / period_size) * 100, 1) - - self.logger.info(f'{_nam}:') - self.logger.info(f"\tAcquisition rate: {_acq_rate}%") - self.logger.info(f'\tYield rate: {_yid_rate}%') - self.logger.info(f'\tOEE rate: {_OEE_rate}%') - self.logger.info(f"{'=' * 60}") - - print(f'\n\t{_nam} : ') - print(f'\t\tacquisition rate | yield rate -> OEE rate : ' - f'\033[91m{_acq_rate}% | {_yid_rate}% -> {_OEE_rate}%\033[0m') + _sample_rate = round((sample_size / period_size) * 100, 1) + _valid_rate = round((qc_size / sample_size) * 100, 1) + _total_rate = round((qc_size / period_size) * 100, 1) + + self.logger.info(f"\t\t{self.logger.CYAN}▶ {_nam}{self.logger.RESET}") + self.logger.info( + f"\t\t\t├─ {'Sample Rate':15}: {self.logger.BLUE}{_sample_rate:>6.1f}%{self.logger.RESET}") + self.logger.info( + f"\t\t\t├─ {'Valid Rate':15}: {self.logger.BLUE}{_valid_rate:>6.1f}%{self.logger.RESET}") + self.logger.info( + f"\t\t\t└─ {'Total Rate':15}: {self.logger.BLUE}{_total_rate:>6.1f}%{self.logger.RESET}") if self.meta['deter_key'] is not None: # use qc_freq to calculate each period rate @@ -141,9 +124,8 @@ def __base_rate(raw_data, qc_data): for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped): self.logger.info( - f"\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}") - print( - f"\n\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}") + f"\t{self.logger.BLUE}▶ Processing: {_sub_raw_data.index[0].strftime('%F')}" + f" to {_sub_raw_data.index[-1].strftime('%F')}{self.logger.RESET}") __base_rate(_sub_raw_data, _sub_qc_data) @@ -207,6 +189,34 @@ def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None: except Exception as e: raise IOError(f"Error saving data. {e}") + @contextmanager + def progress_reading(self, files: list) -> Generator: + # Create message temporary storage and replace logger method + logs = {level: [] for level in ['info', 'warning', 'error']} + original = {level: getattr(self.logger, level) for level in logs} + + for level, msgs in logs.items(): + setattr(self.logger, level, msgs.append) + + try: + with Progress( + TextColumn("[bold blue]{task.description}", style="bold blue"), + BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"), + TaskProgressColumn(), + TimeRemainingColumn(), + TextColumn("{task.fields[filename]}", style="yellow"), + console=Console(force_terminal=True, color_system="auto", width=120), + expand=False + ) as progress: + task = progress.add_task(f"▶ Reading {self.nam} files", total=len(files), filename="") + yield progress, task + finally: + # Restore logger method and output message + for level, msgs in logs.items(): + setattr(self.logger, level, original[level]) + for msg in msgs: + original[level](msg) + def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]: files = [f for file_pattern in self.meta['pattern'] @@ -218,34 +228,22 @@ def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]: raise FileNotFoundError(f"No files in '{self.path}' could be read. Please check the current path.") df_list = [] - with Progress( - TextColumn("[bold blue]{task.description}", style="bold blue"), - BarColumn(bar_width=18, complete_style="green", finished_style="bright_green"), - TaskProgressColumn(), - TimeRemainingColumn(), - TextColumn("{task.fields[filename]}", style="yellow"), - console=Console(force_terminal=True, color_system="auto"), - expand=False - ) as progress: - task = progress.add_task(f"Reading {self.nam} files", total=len(files), filename="") + + # Context manager for progress bar display + with self.progress_reading(files) as (progress, task): for file in files: progress.update(task, advance=1, filename=file.name) try: - df = self._raw_reader(file) - - if df is not None and not df.empty: + if (df := self._raw_reader(file)) is not None and not df.empty: df_list.append(df) else: - self.logger.warning(f"File {file.name} produced an empty DataFrame or None.") - - except pd.errors.ParserError as e: - self.logger.error(f"Error tokenizing data: {e}") + self.logger.warning(f"\tFile {file.name} produced an empty DataFrame or None.") except Exception as e: self.logger.error(f"Error reading {file.name}: {e}") if not df_list: - raise ValueError("All files were either empty or failed to read.") + raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m") raw_data = concat(df_list, axis=0).groupby(level=0).first() @@ -260,29 +258,28 @@ def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]: def _run(self, user_start, user_end): # read pickle if pickle file exists and 'reset=False' or process raw data or append new data if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset: - print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mPICKLE\033[0m " - f"from {user_start} to {user_end}\n") + self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}", color_part="PICKLE") _f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam) if self.append: - print(f"Appending new data from {user_start} to {user_end}") + self.logger.info_box(f"Appending New data from {user_start} to {user_end}", color_part="New data") + _f_raw_new, _f_qc_new = self._read_raw_files() _f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new) _f_qc = self._timeIndex_process(_f_qc_done, append_df=_f_qc_new) + else: _f_raw, _f_qc = _f_raw_done, _f_qc_done + return _f_qc if self.qc else _f_raw else: - print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mRAW DATA\033[0m " - f"from {user_start} to {user_end}\n") + self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}", color_part="RAW DATA") _f_raw, _f_qc = self._read_raw_files() # process time index - data_start, data_end = _f_raw.index.sort_values()[[0, -1]] - _f_raw = self._timeIndex_process(_f_raw, user_start, user_end) _f_qc = self._timeIndex_process(_f_qc, user_start, user_end) _f_qc = self._outlier_process(_f_qc) @@ -290,15 +287,8 @@ def _run(self, user_start, user_end): # save self._save_data(_f_raw, _f_qc) - self.logger.info(f"{'=' * 60}") - self.logger.info(f"Raw data time : {data_start} to {data_end}") - self.logger.info(f"Output time : {user_start} to {user_end}") - self.logger.info(f"{'-' * 60}") - if self.rate: - _f_raw = _f_raw.apply(to_numeric, errors='coerce') - _f_qc = _f_qc.apply(to_numeric, errors='coerce') - self._rate_calculate(_f_raw, _f_qc) + self._rate_calculate(_f_raw.apply(to_numeric, errors='coerce'), _f_qc.apply(to_numeric, errors='coerce')) return _f_qc if self.qc else _f_raw diff --git a/AeroViz/rawDataReader/core/logger.py b/AeroViz/rawDataReader/core/logger.py new file mode 100644 index 0000000..acb0706 --- /dev/null +++ b/AeroViz/rawDataReader/core/logger.py @@ -0,0 +1,78 @@ +import logging +import re +import sys +from pathlib import Path + + +class ReaderLogger: + def __init__(self, name: str, log_path: Path): + self.name = name + self.log_path = log_path + + # ANSI color codes + self.CYAN = '\033[96m' + self.BLUE = '\033[94m' + self.GREEN = '\033[92m' + self.YELLOW = '\033[93m' + self.RED = '\033[91m' + self.RESET = '\033[0m' + + self.logger = self._setup_logger() + + def _setup_logger(self) -> logging.Logger: + logger = logging.getLogger(self.name) + logger.setLevel(logging.INFO) + + # Remove existing handlers + for handler in logger.handlers[:]: + handler.close() + logger.removeHandler(handler) + + # clean ANSI formatter (for log file) + class CleanFormatter(logging.Formatter): + def format(self, record): + formatted_msg = super().format(record) + return re.sub(r'\033\[[0-9;]*m', '', formatted_msg) + + # Set up handlers + file_handler = logging.FileHandler(self.log_path / f'{self.name}.log') + file_handler.setFormatter(CleanFormatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')) + + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(logging.Formatter('%(message)s')) + + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + return logger + + def info(self, msg: str): + self.logger.info(msg) + + def warning(self, msg: str): + self.logger.warning(msg) + + def error(self, msg: str): + self.logger.error(msg) + + def info_box(self, text: str, color_part: str = None, width: int = 80): + """ + Create a boxed message with optional colored text + + Args: + text: Base text format (e.g., "Reading {} RAW DATA from {} to {}") + color_part: Part of text to be colored (e.g., "RAW DATA") + width: Box width + """ + display_text = text.replace(color_part, " " * len(color_part)) if color_part else text + + left_padding = " " * ((width - len(display_text)) // 2) + right_padding = " " * (width - len(display_text) - len(left_padding)) + + content = text.replace(color_part, f"{self.CYAN}{color_part}{self.RESET}") if color_part else text + + __content__ = f"{left_padding}{content}{right_padding}" + + self.info(f"╔{'═' * width}╗") + self.info(f"║{__content__}║") + self.info(f"╚{'═' * width}╝") diff --git a/AeroViz/rawDataReader/script/Minion.py b/AeroViz/rawDataReader/script/Minion.py index 4190686..be5f0b9 100644 --- a/AeroViz/rawDataReader/script/Minion.py +++ b/AeroViz/rawDataReader/script/Minion.py @@ -149,7 +149,7 @@ def XRF_QAQC(self, columns_to_convert = [col for col in MDL.keys() if col in df.columns] df[columns_to_convert] = df[columns_to_convert].div(1000) - self.logger.info(f"XRF QAQC summary: transform values below MDL to {MDL_replace}") + self.logger.info(f"\t{'XRF QAQC summary':21}: transform values below MDL to {MDL_replace}") return df @@ -206,9 +206,10 @@ def IGAC_QAQC(self, # 計算保留的数據的百分比 retained_percentage = (valid_mask.sum() / len(df)) * 100 - self.logger.info(f"Ions balance summary: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}") + self.logger.info( + f"\t{'Ions balance summary':21}: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}") if retained_percentage < 70: - self.logger.warning("Warning: The percentage of retained data is less than 70%") + self.logger.warning("\tWarning: The percentage of retained data is less than 70%") return df diff --git a/AeroViz/rawDataReader/script/SMPS.py b/AeroViz/rawDataReader/script/SMPS.py index 165c1cf..5620af7 100644 --- a/AeroViz/rawDataReader/script/SMPS.py +++ b/AeroViz/rawDataReader/script/SMPS.py @@ -57,7 +57,8 @@ def _raw_reader(self, file): _df_smps = _df_smps.loc[_df_smps.index.dropna().copy()] if _df_smps.columns[0] != self.size_range[0] or _df_smps.columns[-1] != self.size_range[1]: - print(f'SMPS file: {file.name} is not match the default size range {self.size_range}.') + self.logger.info(f'\tSMPS file: {file.name} is not match the default size range {self.size_range}, ' + f'it is ({_df_smps.columns[0]}, {_df_smps.columns[-1]})') return _df_smps.apply(to_numeric, errors='coerce') @@ -76,8 +77,7 @@ def _QC(self, _df): _df = _df.mask(_df['total'] < 2000) # remove the bin over 400 nm which num. conc. larger than 4000 - _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.] - + _df_remv_ky = _df.keys()[:-1][_df.keys()[:-1] >= 400.] _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.) return _df[_df.keys()[:-1]]