Skip to content

Commit

Permalink
refactor(logger): enhance progress bar visualization and formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex870521 committed Nov 6, 2024
1 parent afa393d commit 8dea894
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 80 deletions.
138 changes: 64 additions & 74 deletions AeroViz/rawDataReader/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import json
import logging
from abc import ABC, abstractmethod
from contextlib import contextmanager
from datetime import datetime
from pathlib import Path
from typing import Optional
from typing import Optional, Generator

import numpy as np
import pandas as pd
Expand All @@ -12,6 +12,7 @@
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn

from AeroViz.rawDataReader.config.supported_instruments import meta
from AeroViz.rawDataReader.core.logger import ReaderLogger
from AeroViz.rawDataReader.core.qc import DataQualityControl

__all__ = ['AbstractReader']
Expand Down Expand Up @@ -40,7 +41,7 @@ def __init__(self,

self.path = Path(path)
self.meta = meta[self.nam]
self.logger = self._setup_logger()
self.logger = ReaderLogger(self.nam, self.path)

self.reset = reset
self.qc = qc
Expand Down Expand Up @@ -81,20 +82,6 @@ def _raw_reader(self, file):
def _QC(self, df: DataFrame) -> DataFrame:
return df

def _setup_logger(self) -> logging.Logger:
logger = logging.getLogger(self.nam)
logger.setLevel(logging.INFO)

for handler in logger.handlers[:]:
handler.close()
logger.removeHandler(handler)

handler = logging.FileHandler(self.path / f'{self.nam}.log')
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
logger.addHandler(handler)

return logger

def _rate_calculate(self, raw_data, qc_data) -> None:
def __base_rate(raw_data, qc_data):
period_size = len(raw_data.resample('1h').mean().index)
Expand All @@ -107,31 +94,27 @@ def __base_rate(raw_data, qc_data):

# validate rate calculation
if period_size == 0 or sample_size == 0 or qc_size == 0:
self.logger.warning(f'\t\t No data for this period... skipping')
print(f'\t\t\033[91m No data for this period... skipping\033[0m')
self.logger.warning(f'\t\t No data for this period... skip')
continue

if period_size < sample_size or sample_size < qc_size:
self.logger.warning(
f'\t\tInvalid size relationship: period={period_size}, sample={sample_size}, QC={qc_size}... skipping')
print(
f'\t\tInvalid size relationship: period={period_size}, sample={sample_size}, QC={qc_size}... skipping')
if period_size < sample_size:
self.logger.warning(f'\t\tError: Sample({sample_size}) > Period({period_size})... skip')
continue
if sample_size < qc_size:
self.logger.warning(f'\t\tError: QC({qc_size}) > Sample({sample_size})... skip')
continue

else:
_acq_rate = round((sample_size / period_size) * 100, 1)
_yid_rate = round((qc_size / sample_size) * 100, 1)
_OEE_rate = round((qc_size / period_size) * 100, 1)

self.logger.info(f'{_nam}:')
self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
self.logger.info(f'\tYield rate: {_yid_rate}%')
self.logger.info(f'\tOEE rate: {_OEE_rate}%')
self.logger.info(f"{'=' * 60}")

print(f'\n\t{_nam} : ')
print(f'\t\tacquisition rate | yield rate -> OEE rate : '
f'\033[91m{_acq_rate}% | {_yid_rate}% -> {_OEE_rate}%\033[0m')
_sample_rate = round((sample_size / period_size) * 100, 1)
_valid_rate = round((qc_size / sample_size) * 100, 1)
_total_rate = round((qc_size / period_size) * 100, 1)

self.logger.info(f"\t\t{self.logger.CYAN}{_nam}{self.logger.RESET}")
self.logger.info(
f"\t\t\t├─ {'Sample Rate':15}: {self.logger.BLUE}{_sample_rate:>6.1f}%{self.logger.RESET}")
self.logger.info(
f"\t\t\t├─ {'Valid Rate':15}: {self.logger.BLUE}{_valid_rate:>6.1f}%{self.logger.RESET}")
self.logger.info(
f"\t\t\t└─ {'Total Rate':15}: {self.logger.BLUE}{_total_rate:>6.1f}%{self.logger.RESET}")

if self.meta['deter_key'] is not None:
# use qc_freq to calculate each period rate
Expand All @@ -141,9 +124,8 @@ def __base_rate(raw_data, qc_data):

for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped):
self.logger.info(
f"\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}")
print(
f"\n\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}")
f"\t{self.logger.BLUE}▶ Processing: {_sub_raw_data.index[0].strftime('%F')}"
f" to {_sub_raw_data.index[-1].strftime('%F')}{self.logger.RESET}")

__base_rate(_sub_raw_data, _sub_qc_data)

Expand Down Expand Up @@ -207,6 +189,34 @@ def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
except Exception as e:
raise IOError(f"Error saving data. {e}")

@contextmanager
def progress_reading(self, files: list) -> Generator:
# Create message temporary storage and replace logger method
logs = {level: [] for level in ['info', 'warning', 'error']}
original = {level: getattr(self.logger, level) for level in logs}

for level, msgs in logs.items():
setattr(self.logger, level, msgs.append)

try:
with Progress(
TextColumn("[bold blue]{task.description}", style="bold blue"),
BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"),
TaskProgressColumn(),
TimeRemainingColumn(),
TextColumn("{task.fields[filename]}", style="yellow"),
console=Console(force_terminal=True, color_system="auto", width=120),
expand=False
) as progress:
task = progress.add_task(f"▶ Reading {self.nam} files", total=len(files), filename="")
yield progress, task
finally:
# Restore logger method and output message
for level, msgs in logs.items():
setattr(self.logger, level, original[level])
for msg in msgs:
original[level](msg)

def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
files = [f
for file_pattern in self.meta['pattern']
Expand All @@ -218,34 +228,22 @@ def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
raise FileNotFoundError(f"No files in '{self.path}' could be read. Please check the current path.")

df_list = []
with Progress(
TextColumn("[bold blue]{task.description}", style="bold blue"),
BarColumn(bar_width=18, complete_style="green", finished_style="bright_green"),
TaskProgressColumn(),
TimeRemainingColumn(),
TextColumn("{task.fields[filename]}", style="yellow"),
console=Console(force_terminal=True, color_system="auto"),
expand=False
) as progress:
task = progress.add_task(f"Reading {self.nam} files", total=len(files), filename="")

# Context manager for progress bar display
with self.progress_reading(files) as (progress, task):
for file in files:
progress.update(task, advance=1, filename=file.name)
try:
df = self._raw_reader(file)

if df is not None and not df.empty:
if (df := self._raw_reader(file)) is not None and not df.empty:
df_list.append(df)
else:
self.logger.warning(f"File {file.name} produced an empty DataFrame or None.")

except pd.errors.ParserError as e:
self.logger.error(f"Error tokenizing data: {e}")
self.logger.warning(f"\tFile {file.name} produced an empty DataFrame or None.")

except Exception as e:
self.logger.error(f"Error reading {file.name}: {e}")

if not df_list:
raise ValueError("All files were either empty or failed to read.")
raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m")

raw_data = concat(df_list, axis=0).groupby(level=0).first()

Expand All @@ -260,45 +258,37 @@ def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
def _run(self, user_start, user_end):
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mPICKLE\033[0m "
f"from {user_start} to {user_end}\n")
self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}", color_part="PICKLE")

_f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam)

if self.append:
print(f"Appending new data from {user_start} to {user_end}")
self.logger.info_box(f"Appending New data from {user_start} to {user_end}", color_part="New data")

_f_raw_new, _f_qc_new = self._read_raw_files()
_f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
_f_qc = self._timeIndex_process(_f_qc_done, append_df=_f_qc_new)

else:
_f_raw, _f_qc = _f_raw_done, _f_qc_done

return _f_qc if self.qc else _f_raw

else:
print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mRAW DATA\033[0m "
f"from {user_start} to {user_end}\n")
self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}", color_part="RAW DATA")

_f_raw, _f_qc = self._read_raw_files()

# process time index
data_start, data_end = _f_raw.index.sort_values()[[0, -1]]

_f_raw = self._timeIndex_process(_f_raw, user_start, user_end)
_f_qc = self._timeIndex_process(_f_qc, user_start, user_end)
_f_qc = self._outlier_process(_f_qc)

# save
self._save_data(_f_raw, _f_qc)

self.logger.info(f"{'=' * 60}")
self.logger.info(f"Raw data time : {data_start} to {data_end}")
self.logger.info(f"Output time : {user_start} to {user_end}")
self.logger.info(f"{'-' * 60}")

if self.rate:
_f_raw = _f_raw.apply(to_numeric, errors='coerce')
_f_qc = _f_qc.apply(to_numeric, errors='coerce')
self._rate_calculate(_f_raw, _f_qc)
self._rate_calculate(_f_raw.apply(to_numeric, errors='coerce'), _f_qc.apply(to_numeric, errors='coerce'))

return _f_qc if self.qc else _f_raw

Expand Down
78 changes: 78 additions & 0 deletions AeroViz/rawDataReader/core/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import logging
import re
import sys
from pathlib import Path


class ReaderLogger:
def __init__(self, name: str, log_path: Path):
self.name = name
self.log_path = log_path

# ANSI color codes
self.CYAN = '\033[96m'
self.BLUE = '\033[94m'
self.GREEN = '\033[92m'
self.YELLOW = '\033[93m'
self.RED = '\033[91m'
self.RESET = '\033[0m'

self.logger = self._setup_logger()

def _setup_logger(self) -> logging.Logger:
logger = logging.getLogger(self.name)
logger.setLevel(logging.INFO)

# Remove existing handlers
for handler in logger.handlers[:]:
handler.close()
logger.removeHandler(handler)

# clean ANSI formatter (for log file)
class CleanFormatter(logging.Formatter):
def format(self, record):
formatted_msg = super().format(record)
return re.sub(r'\033\[[0-9;]*m', '', formatted_msg)

# Set up handlers
file_handler = logging.FileHandler(self.log_path / f'{self.name}.log')
file_handler.setFormatter(CleanFormatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))

console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(logging.Formatter('%(message)s'))

logger.addHandler(file_handler)
logger.addHandler(console_handler)

return logger

def info(self, msg: str):
self.logger.info(msg)

def warning(self, msg: str):
self.logger.warning(msg)

def error(self, msg: str):
self.logger.error(msg)

def info_box(self, text: str, color_part: str = None, width: int = 80):
"""
Create a boxed message with optional colored text
Args:
text: Base text format (e.g., "Reading {} RAW DATA from {} to {}")
color_part: Part of text to be colored (e.g., "RAW DATA")
width: Box width
"""
display_text = text.replace(color_part, " " * len(color_part)) if color_part else text

left_padding = " " * ((width - len(display_text)) // 2)
right_padding = " " * (width - len(display_text) - len(left_padding))

content = text.replace(color_part, f"{self.CYAN}{color_part}{self.RESET}") if color_part else text

__content__ = f"{left_padding}{content}{right_padding}"

self.info(f"╔{'═' * width}╗")
self.info(f"║{__content__}║")
self.info(f"╚{'═' * width}╝")
7 changes: 4 additions & 3 deletions AeroViz/rawDataReader/script/Minion.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def XRF_QAQC(self,
columns_to_convert = [col for col in MDL.keys() if col in df.columns]
df[columns_to_convert] = df[columns_to_convert].div(1000)

self.logger.info(f"XRF QAQC summary: transform values below MDL to {MDL_replace}")
self.logger.info(f"\t{'XRF QAQC summary':21}: transform values below MDL to {MDL_replace}")

return df

Expand Down Expand Up @@ -206,9 +206,10 @@ def IGAC_QAQC(self,
# 計算保留的数據的百分比
retained_percentage = (valid_mask.sum() / len(df)) * 100

self.logger.info(f"Ions balance summary: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}")
self.logger.info(
f"\t{'Ions balance summary':21}: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}")

if retained_percentage < 70:
self.logger.warning("Warning: The percentage of retained data is less than 70%")
self.logger.warning("\tWarning: The percentage of retained data is less than 70%")

return df
6 changes: 3 additions & 3 deletions AeroViz/rawDataReader/script/SMPS.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def _raw_reader(self, file):
_df_smps = _df_smps.loc[_df_smps.index.dropna().copy()]

if _df_smps.columns[0] != self.size_range[0] or _df_smps.columns[-1] != self.size_range[1]:
print(f'SMPS file: {file.name} is not match the default size range {self.size_range}.')
self.logger.info(f'\tSMPS file: {file.name} is not match the default size range {self.size_range}, '
f'it is ({_df_smps.columns[0]}, {_df_smps.columns[-1]})')

return _df_smps.apply(to_numeric, errors='coerce')

Expand All @@ -76,8 +77,7 @@ def _QC(self, _df):
_df = _df.mask(_df['total'] < 2000)

# remove the bin over 400 nm which num. conc. larger than 4000
_df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.]

_df_remv_ky = _df.keys()[:-1][_df.keys()[:-1] >= 400.]
_df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)

return _df[_df.keys()[:-1]]

0 comments on commit 8dea894

Please sign in to comment.