diff --git a/.github/workflows/cleanup.yml b/.github/workflows/cleanup.yml index 5fd75af..5695c83 100644 --- a/.github/workflows/cleanup.yml +++ b/.github/workflows/cleanup.yml @@ -1,19 +1,93 @@ -# .gitHub/workflows/cleanup.yml -name: Cleanup Deployments +# .github/workflows/cleanup.yml +name: Repository Cleanup on: - workflow_dispatch: # 允許手動觸發 + workflow_dispatch: + inputs: + action_type: + description: '選擇要執行的操作' + required: true + type: choice + options: + - 'Cleanup Workflow' + - 'Cleanup Deployments' + workflow_status: + description: '要清理的工作流程狀態 (僅在選擇 Cleanup Workflow 時需要)' + required: false + type: choice + options: + - 'disabled' # 已停用的工作流程 + - 'active' # 活躍的工作流程 + - 'all' # 所有工作流程 + environment: + description: '要清理的部署環境 (僅在選擇 Cleanup Deployments 時需要)' + required: false + type: choice + options: + - 'all' + - 'github-pages' + - 'pypi' jobs: - cleanup: + cleanup-workflows: + if: ${{ github.event.inputs.action_type == 'Cleanup Workflow' }} + runs-on: ubuntu-latest + permissions: + actions: write + steps: + - name: Cleanup workflows + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const status = '${{ github.event.inputs.workflow_status }}'; + console.log(`Cleaning up workflows with status: ${status}`); + + // 獲取所有工作流程 + const workflows = await github.rest.actions.listRepoWorkflows({ + owner: context.repo.owner, + repo: context.repo.repo + }); + + for (const workflow of workflows.data.workflows) { + // 根據選擇的狀態過濾工作流程 + if (status === 'all' || + (status === 'disabled' && !workflow.state === 'active') || + (status === 'active' && workflow.state === 'active')) { + + console.log(`Processing workflow: ${workflow.name} (${workflow.state})`); + + // 獲取此工作流程的所有運行 + const runs = await github.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: workflow.id, + }); + + // 刪除運行 + console.log(`Found ${runs.data.total_count} runs to delete`); + for (const run of runs.data.workflow_runs) { + console.log(`Deleting run #${run.run_number} of ${workflow.name}`); + await github.rest.actions.deleteWorkflowRun({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: run.id + }); + } + } + } + console.log('Cleanup completed'); + + cleanup-deployments: + if: ${{ github.event.inputs.action_type == 'Cleanup Deployments' }} runs-on: ubuntu-latest permissions: deployments: write actions: write contents: write - steps: - name: Delete github-pages deployments + if: ${{ github.event.inputs.environment == 'github-pages' || github.event.inputs.environment == 'all' }} uses: strumwolf/delete-deployment-environment@v2 with: token: ${{ secrets.GITHUB_TOKEN }} @@ -21,8 +95,9 @@ jobs: onlyRemoveDeployments: true - name: Delete pypi deployments + if: ${{ github.event.inputs.environment == 'pypi' || github.event.inputs.environment == 'all' }} uses: strumwolf/delete-deployment-environment@v2 with: token: ${{ secrets.GITHUB_TOKEN }} environment: pypi - onlyRemoveDeployments: true + onlyRemoveDeployments: true \ No newline at end of file diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index a91cd84..99602c0 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,54 +1,135 @@ -# .gitHub/workflows/publish.yml -name: Publish to PyPI +name: Publish AeroViz on: push: tags: - - 'v*' # 當推送版本標籤時觸發,如 v0.1.0 + - 'v*' jobs: - build-and-publish: + build-and-test: + strategy: + matrix: + python-version: [ "3.11", "3.12" ] runs-on: ubuntu-latest - environment: - name: pypi steps: - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 with: - python-version: '3.x' + python-version: ${{ matrix.python-version }} + cache: 'pip' - name: Install dependencies run: | python -m pip install --upgrade pip - pip install setuptools wheel build twine + pip install setuptools wheel build + pip install -e . + pip install -e ".[test]" - - name: Extract version from tag - id: get_version - run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV + - name: Run tests + run: | + pytest tests/ -m "not requires_data" - - name: Verify version matches + - name: Verify package version matches tag run: | - VERSION=$(python setup.py --version) - if [ "$VERSION" != "${{ env.VERSION }}" ]; then - echo "Version mismatch: Tag version (${{ env.VERSION }}) doesn't match package version ($VERSION)" + TAG_VERSION=${GITHUB_REF#refs/tags/v} + PACKAGE_VERSION=$(python setup.py --version) + + if [ "$PACKAGE_VERSION" != "$TAG_VERSION" ]; then + echo "Version mismatch:" + echo " - Tag version: $TAG_VERSION" + echo " - Package version: $PACKAGE_VERSION" exit 1 + else + echo "Version match: $TAG_VERSION" fi - name: Build package run: python -m build - - name: Publish to Test PyPI - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }} - run: | - twine upload --repository testpypi dist/* + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions-${{ matrix.python-version }} + path: dist/ + + publish-test: + needs: build-and-test + runs-on: ubuntu-latest + environment: + name: test-pypi + url: https://test.pypi.org/p/AeroViz + permissions: + id-token: write + + steps: + # Download artifacts from Python 3.12 build only + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: python-package-distributions-3.12 + path: dist/ + + - name: Publish to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + + publish-prod: + needs: publish-test + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/AeroViz + permissions: + id-token: write + + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: python-package-distributions-3.12 + path: dist/ - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + github-release: + name: Create GitHub Release + needs: publish-prod + runs-on: ubuntu-latest + permissions: + contents: write + id-token: write + + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: python-package-distributions-3.12 + path: dist/ + + - name: Sign the dists with Sigstore + uses: sigstore/gh-action-sigstore-python@v2.1.1 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + + - name: Create GitHub Release env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - run: | - twine upload dist/* \ No newline at end of file + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release create + '${{ github.ref_name }}' + --repo '${{ github.repository }}' + --notes "Release ${{ github.ref_name }}" + + - name: Upload artifacts to GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release upload + '${{ github.ref_name }}' dist/** + --repo '${{ github.repository }}' \ No newline at end of file diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 3265745..99829c3 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -2,30 +2,48 @@ name: Python Tests on: push: - branches: [ main ] + branches: [ main, master ] pull_request: - branches: [ main ] + branches: [ main, master ] jobs: test: - runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ "3.11", "3.12" ] + os: [ ubuntu-latest ] + + fail-fast: false + + runs-on: ${{ matrix.os }} steps: - name: Checkout code uses: actions/checkout@v4 - - name: Set up Python 3.12 + - name: Set up Python 3.XX uses: actions/setup-python@v5 with: - python-version: "3.12" + python-version: ${{ matrix.python-version }} + cache: 'pip' # 啟用 pip 緩存加速安裝 - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest - pip install -r requirements/requirements.txt pip install -e . + pip install -e ".[test]" - - name: Run tests + - name: Run tests with coverage run: | - pytest tests/test_aeroviz_import.py \ No newline at end of file + pytest tests/ -m "not requires_data" \ + --cov=AeroViz \ + --cov-report=term-missing \ + --cov-report=xml \ + -v + + - name: Upload coverage reports + uses: actions/upload-artifact@v4 + with: + name: coverage-report-${{ matrix.python-version }}-${{ github.sha }} + path: coverage.xml + if-no-files-found: error diff --git a/AeroViz/__init__.py b/AeroViz/__init__.py index 6142d50..8063006 100644 --- a/AeroViz/__init__.py +++ b/AeroViz/__init__.py @@ -2,13 +2,12 @@ from AeroViz import plot from AeroViz.dataProcess import DataProcess from AeroViz.rawDataReader import RawDataReader -from AeroViz.tools import DataBase, DataReader, DataClassifier +from AeroViz.tools import DataBase, DataClassifier __all__ = [ 'plot', 'RawDataReader', 'DataProcess', 'DataBase', - 'DataReader', 'DataClassifier' ] diff --git a/AeroViz/plot/optical/PyMieScatt_update.py b/AeroViz/dataProcess/Optical/PyMieScatt_update.py similarity index 98% rename from AeroViz/plot/optical/PyMieScatt_update.py rename to AeroViz/dataProcess/Optical/PyMieScatt_update.py index b2a9a35..d5e6a43 100644 --- a/AeroViz/plot/optical/PyMieScatt_update.py +++ b/AeroViz/dataProcess/Optical/PyMieScatt_update.py @@ -6,13 +6,6 @@ from scipy.special import jv, yv -def coerceDType(d): - if type(d) is not np.ndarray: - return np.array(d) - else: - return d - - def MieQ(m, wavelength, diameter, nMedium=1.0, asDict=False, asCrossSection=False): # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ nMedium = nMedium.real @@ -271,8 +264,8 @@ def Mie_SD(m, wavelength, dp, ndp, nMedium=1.0, SMPS=True, interpolate=False, as nMedium = nMedium.real m /= nMedium wavelength /= nMedium - dp = coerceDType(dp) - ndp = coerceDType(ndp) + dp = np.array(dp) + ndp = np.array(ndp) _length = np.size(dp) Q_ext = np.zeros(_length) Q_sca = np.zeros(_length) @@ -373,8 +366,8 @@ def SF_SD(m, wavelength, dp, ndp, nMedium=1.0, minAngle=0, maxAngle=180, angular wavelength /= nMedium _steps = int(1 + (maxAngle - minAngle) / angularResolution) - ndp = coerceDType(ndp) - dp = coerceDType(dp) + ndp = np.array(ndp) + dp = np.array(dp) SL = np.zeros(_steps) SR = np.zeros(_steps) SU = np.zeros(_steps) diff --git a/AeroViz/plot/optical/mie_theory.py b/AeroViz/dataProcess/Optical/mie_theory.py similarity index 100% rename from AeroViz/plot/optical/mie_theory.py rename to AeroViz/dataProcess/Optical/mie_theory.py diff --git a/AeroViz/dataProcess/SizeDistr/prop.py b/AeroViz/dataProcess/SizeDistr/prop.py new file mode 100644 index 0000000..d55a8db --- /dev/null +++ b/AeroViz/dataProcess/SizeDistr/prop.py @@ -0,0 +1,62 @@ +import numpy as np +from numpy import exp, log +from scipy.signal import find_peaks + + +def geometric(dp: np.ndarray, + dist: np.ndarray + ) -> tuple[float, float]: + """ Calculate the geometric mean and standard deviation. """ + + _gmd = (((dist * log(dp)).sum()) / dist.sum()) + + logdp_mesh, gmd_mesh = np.meshgrid(log(dp), _gmd) + _gsd = ((((logdp_mesh - gmd_mesh) ** 2) * dist).sum() / dist.sum()) ** .5 + + return exp(_gmd), exp(_gsd) + + +def contribution(dp: np.ndarray, + dist: np.ndarray + ) -> tuple[float, float, float]: + """ Calculate the relative contribution of each mode. """ + + ultra = dist[(dp >= 11.8) & (dp < 100)].sum() / dist.sum() + accum = dist[(dp >= 100) & (dp < 1000)].sum() / dist.sum() + coars = dist[(dp >= 1000) & (dp < 2500)].sum() / dist.sum() + + return ultra, accum, coars + + +def mode(dp: np.ndarray, + dist: np.ndarray + ) -> np.ndarray: + """ Find three peak mode in distribution. """ + + min_value = np.array([dist.min()]) + mode, _ = find_peaks(np.concatenate([min_value, dist, min_value]), distance=len(dist) - 1) + + return dp[mode - 1] + + +def properties(dist, + dp: np.ndarray, + dlogdp: np.ndarray, + weighting: str + ) -> dict: + """ for apply """ + dist = np.array(dist) + + gmd, gsd = geometric(dp, dist) + ultra, accum, coarse = contribution(dp, dist) + peak = mode(dp, dist) + + return {key: round(value, 3) for key, value in + {f'total_{weighting}': (dist * dlogdp).sum(), + f'GMD_{weighting}': gmd, + f'GSD_{weighting}': gsd, + f'mode_{weighting}': peak[0], + f'ultra_{weighting}': ultra, + f'accum_{weighting}': accum, + f'coarse_{weighting}': coarse} + .items()} diff --git a/AeroViz/plot/__init__.py b/AeroViz/plot/__init__.py index 93df043..b5b648e 100644 --- a/AeroViz/plot/__init__.py +++ b/AeroViz/plot/__init__.py @@ -3,7 +3,6 @@ from . import optical from .bar import bar from .box import box -from .hysplit import hysplit from .pie import pie, donuts from .radar import radar from .regression import linear_regression, multiple_linear_regression diff --git a/AeroViz/plot/hysplit/__init__.py b/AeroViz/plot/hysplit/__init__.py deleted file mode 100644 index 8b90a72..0000000 --- a/AeroViz/plot/hysplit/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .hysplit import * diff --git a/AeroViz/plot/meteorology/meteorology.py b/AeroViz/plot/meteorology/CBPF.py similarity index 52% rename from AeroViz/plot/meteorology/meteorology.py rename to AeroViz/plot/meteorology/CBPF.py index 11b941c..193fb37 100644 --- a/AeroViz/plot/meteorology/meteorology.py +++ b/AeroViz/plot/meteorology/CBPF.py @@ -1,85 +1,159 @@ import math -from typing import Literal import matplotlib.pyplot as plt import numpy as np import pandas as pd -import windrose from matplotlib.pyplot import Figure, Axes from pandas import DataFrame, Series from scipy.ndimage import gaussian_filter from AeroViz.plot.utils import * -__all__ = ['wind_rose', - 'CBPF' - ] +__all__ = ['CBPF'] + + +def improve_density_estimation(df, WS, WD, val, resolution=100, bandwidth=None): + """ + 改進的密度估計函數,使用KDE方法來產生更平滑的分布 + + Parameters: + ----------- + df : DataFrame + 包含風速風向數據的DataFrame + WS : str + 風速列名 + WD : str + 風向列名 + val : str + 要分析的變量列名 + resolution : int + 網格解析度 + bandwidth : float or tuple + KDE的頻寬參數,如果為None則自動選擇 + """ + from scipy.stats import gaussian_kde + import numpy as np + + # 轉換為笛卡爾坐標 + u = df[WS] * np.sin(np.radians(df[WD])) + v = df[WS] * np.cos(np.radians(df[WD])) + + # 創建網格 + u_range = np.linspace(u.min(), u.max(), resolution) + v_range = np.linspace(v.min(), v.max(), resolution) + U, V = np.meshgrid(u_range, v_range) + + # 準備KDE的位置 + positions = np.vstack([U.ravel(), V.ravel()]) + values = np.vstack([u, v]) + + # 使用KDE進行密度估計 + kernel = gaussian_kde(values, bw_method=bandwidth) + Z = np.reshape(kernel(positions), U.shape) + + # 將密度值歸一化到[0,1]區間 + Z = (Z - Z.min()) / (Z.max() - Z.min()) + + # 應用極坐標遮罩 + center_u = len(u_range) // 2 + center_v = len(v_range) // 2 + max_radius = min(center_u, center_v) + + Y, X = np.ogrid[-center_v:resolution - center_v, -center_u:resolution - center_u] + mask = X * X + Y * Y > max_radius * max_radius + Z[mask] = np.nan + + return Z, U, V + + +def smooth_and_clean(Z, smooth_radius=2, min_density=1): + """ + 平滑並清理密度圖,去除孤立點 + + Parameters: + ----------- + Z : ndarray + 密度估計結果 + smooth_radius : int + 平滑半徑 + min_density : float + 最小密度閾值 + """ + from scipy.ndimage import gaussian_filter + + # 先進行高斯平滑 + Z_smooth = gaussian_filter(Z, sigma=smooth_radius) + + # 去除低於閾值的點 + # Z_smooth[Z_smooth < min_density] = np.nan + + # 去除孤立點 + rows, cols = Z_smooth.shape + for i in range(rows): + for j in range(cols): + if not np.isnan(Z_smooth[i, j]): + # 檢查周圍點 + neighborhood = Z_smooth[ + max(0, i - smooth_radius):min(rows, i + smooth_radius + 1), + max(0, j - smooth_radius):min(cols, j + smooth_radius + 1) + ] + if np.count_nonzero(~np.isnan(neighborhood)) < 1: # 如果周圍有效點太少 + Z_smooth[i, j] = np.nan + + return Z_smooth + + +def is_within_circle(center_row, center_col, row, col, radius): + return np.sqrt((center_row - row) ** 2 + (center_col - col) ** 2) <= radius + + +def remove_lonely_point(filtered_histogram, radius=4, magic_num=13): + rows, cols = filtered_histogram.shape + data_positions = np.where(~np.isnan(filtered_histogram)) + + for row, col in zip(*data_positions): + valid_data_count = 0 + for i in range(max(0, row - radius), min(rows, row + radius + 1)): + for j in range(max(0, col - radius), min(cols, col + radius + 1)): + if (i, j) != (row, col) and is_within_circle(row, col, i, j, radius): + if not np.isnan(filtered_histogram[i, j]): + valid_data_count += 1 + + if valid_data_count <= magic_num: + filtered_histogram[row, col] = np.nan + + return filtered_histogram + + +def fill_nan_with_mean(filtered_histogram, radius=4, magic_num=13): + rows, cols = filtered_histogram.shape + nan_positions = np.where(np.isnan(filtered_histogram)) + + for row, col in zip(*nan_positions): + surrounding_values = [] + surrounding_values_within_one = [] + nan_count = 0 + for i in range(max(0, row - radius), min(rows, row + radius + 1)): + for j in range(max(0, col - radius), min(cols, col + radius + 1)): + if (i, j) != (row, col) and is_within_circle(row, col, i, j, radius): + if np.isnan(filtered_histogram[i, j]): + nan_count += 1 + else: + surrounding_values.append(filtered_histogram[i, j]) -@set_figure(figsize=(4.3, 4)) -def wind_rose(df: DataFrame, - WS: Series | str, - WD: Series | str, - val: Series | str | None = None, - typ: Literal['bar', 'scatter'] = 'scatter', - rlabel_pos: float = 30, - **kwargs - ) -> tuple[Figure, Axes]: - # conditional bivariate probability function (cbpf) python - # https://davidcarslaw.github.io/openair/reference/polarPlot.html - # https://github.com/davidcarslaw/openair/blob/master/R/polarPlot.R - windrose.WindroseAxes._info = 'WindroseAxes' - - df = df.dropna(subset=[WS, WD] + ([val] if val is not None else [])) - - radius = df[WS].to_numpy() - theta = df[WD].to_numpy() - radian = np.radians(theta) - values = df[val].to_numpy() if val is not None else None - - # In this case, the windrose is a simple frequency diagram, - # the function automatically calculates the radians of the given wind direction. - if typ == 'bar': - fig, ax = plt.subplots(figsize=(5.5, 4), subplot_kw={'projection': 'windrose'}) - fig.subplots_adjust(left=0) - - ax.bar(theta, radius, bins=[0, 1, 2, 3], normed=True, colors=['#0F1035', '#365486', '#7FC7D9', '#DCF2F1']) - ax.set( - ylim=(0, 30), - yticks=[0, 15, 30], - yticklabels=['', '15 %', '30 %'], - rlabel_position=rlabel_pos - ) - ax.set_thetagrids(angles=[0, 45, 90, 135, 180, 225, 270, 315], - labels=["E", "NE", "N", "NW", "W", "SW", "S", "SE"]) - - ax.legend(units='m/s', bbox_to_anchor=[1.1, 0.5], loc='center left', ncol=1) - - # In this case, the windrose is a scatter plot, - # in contrary, this function does not calculate the radians, so user have to input the radian. - else: - fig, ax = plt.subplots(figsize=(5, 4), subplot_kw={'projection': 'windrose'}) - fig.subplots_adjust(left=0) - - scatter = ax.scatter(radian, radius, s=15, c=values, vmax=np.quantile(values, 0.90), edgecolors='none', - cmap='jet', alpha=0.8) - ax.set( - ylim=(0, 7), - yticks=[1, 3, 5, 7], - yticklabels=['1 m/s', '3 m/s', '5 m/s', '7 m/s'], - rlabel_position=rlabel_pos, - theta_direction=-1, - theta_zero_location='N', - title=kwargs.get('title', None) - ) - ax.set_thetagrids(angles=[0, 45, 90, 135, 180, 225, 270, 315], - labels=["N", "NE", "E", "SE", "S", "SW", "W", "NW"]) - - plt.colorbar(scatter, ax=ax, label=Unit(val), pad=0.1, fraction=0.04) + for i in range(max(0, row - 2), min(rows, row + 2 + 1)): + for j in range(max(0, col - 2), min(cols, col + 2 + 1)): + if (i, j) != (row, col) and is_within_circle(row, col, i, j, 2): + if np.isnan(filtered_histogram[i, j]): + pass + else: + surrounding_values_within_one.append(filtered_histogram[i, j]) - plt.show() + if nan_count < magic_num and surrounding_values_within_one: + filtered_histogram[row, col] = np.mean(surrounding_values) - return fig, ax + return filtered_histogram # TODO: fix the bug of the CBPF function @@ -90,7 +164,7 @@ def CBPF(df: DataFrame, val: Series | str | None = None, percentile: list | float | int | None = None, max_ws: float | None = 5, - resolution: int = 100, + resolution: int = 50, sigma: float | tuple = 2, rlabel_pos: float = 30, bottom_text: str | bool | None = None, @@ -157,64 +231,18 @@ def CBPF(df: DataFrame, histogram_filled = np.nan_to_num(histogram, nan=0) # 將 NaN 替換為 0 filtered_histogram = gaussian_filter(histogram_filled, sigma=sigma) - filtered_histogram[np.isnan(histogram)] = np.nan - - def is_within_circle(center_row, center_col, row, col, radius): - return np.sqrt((center_row - row) ** 2 + (center_col - col) ** 2) <= radius - - def remove_lonely_point(filtered_histogram, radius=4, magic_num=13): - rows, cols = filtered_histogram.shape - data_positions = np.where(~np.isnan(filtered_histogram)) - - for row, col in zip(*data_positions): - valid_data_count = 0 - for i in range(max(0, row - radius), min(rows, row + radius + 1)): - for j in range(max(0, col - radius), min(cols, col + radius + 1)): - if (i, j) != (row, col) and is_within_circle(row, col, i, j, radius): - if not np.isnan(filtered_histogram[i, j]): - valid_data_count += 1 - - if valid_data_count <= magic_num: - filtered_histogram[row, col] = np.nan - - return filtered_histogram - - def fill_nan_with_mean(filtered_histogram, radius=4, magic_num=13): - rows, cols = filtered_histogram.shape - nan_positions = np.where(np.isnan(filtered_histogram)) - - for row, col in zip(*nan_positions): - surrounding_values = [] - surrounding_values_within_one = [] - nan_count = 0 - - for i in range(max(0, row - radius), min(rows, row + radius + 1)): - for j in range(max(0, col - radius), min(cols, col + radius + 1)): - if (i, j) != (row, col) and is_within_circle(row, col, i, j, radius): - if np.isnan(filtered_histogram[i, j]): - nan_count += 1 - else: - surrounding_values.append(filtered_histogram[i, j]) - - for i in range(max(0, row - 2), min(rows, row + 2 + 1)): - for j in range(max(0, col - 2), min(cols, col + 2 + 1)): - if (i, j) != (row, col) and is_within_circle(row, col, i, j, 2): - if np.isnan(filtered_histogram[i, j]): - pass - else: - surrounding_values_within_one.append(filtered_histogram[i, j]) - - if nan_count < magic_num and surrounding_values_within_one: - filtered_histogram[row, col] = np.mean(surrounding_values) - - return filtered_histogram + # filtered_histogram[np.isnan(histogram)] = np.nan + # breakpoint() + # filtered_histogram = smooth_and_clean(filtered_histogram) # Apply the function to your data - fil_radius, magic_num = 3, 13 - filtered_histogram = remove_lonely_point(filtered_histogram, fil_radius, magic_num) - filtered_histogram = fill_nan_with_mean(filtered_histogram, fil_radius, magic_num) + # fil_radius, magic_num = 3, 13 + # filtered_histogram = remove_lonely_point(filtered_histogram, fil_radius, magic_num) + # filtered_histogram = fill_nan_with_mean(filtered_histogram, fil_radius, magic_num) + if np.all(np.isnan(filtered_histogram)): raise ValueError("All values in the filtered histogram are NaN. Please decrease the resolution.") + # plot fig, ax = plt.subplots() fig.subplots_adjust(left=0) diff --git a/AeroViz/plot/meteorology/__init__.py b/AeroViz/plot/meteorology/__init__.py index d2a0d9f..de5ea1f 100644 --- a/AeroViz/plot/meteorology/__init__.py +++ b/AeroViz/plot/meteorology/__init__.py @@ -1 +1,3 @@ -from .meteorology import * +from .CBPF import CBPF +from .hysplit import hysplit +from .wind_rose import wind_rose diff --git a/AeroViz/plot/hysplit/hysplit.py b/AeroViz/plot/meteorology/hysplit.py similarity index 100% rename from AeroViz/plot/hysplit/hysplit.py rename to AeroViz/plot/meteorology/hysplit.py diff --git a/AeroViz/plot/meteorology/wind_rose.py b/AeroViz/plot/meteorology/wind_rose.py new file mode 100644 index 0000000..6ffc58b --- /dev/null +++ b/AeroViz/plot/meteorology/wind_rose.py @@ -0,0 +1,77 @@ +from typing import Literal + +import matplotlib.pyplot as plt +import numpy as np +import windrose +from matplotlib.pyplot import Figure, Axes +from pandas import DataFrame, Series + +from AeroViz.plot.utils import * + +__all__ = ['wind_rose'] + + +@set_figure(figsize=(4.3, 4)) +def wind_rose(df: DataFrame, + WS: Series | str, + WD: Series | str, + val: Series | str | None = None, + typ: Literal['bar', 'scatter'] = 'scatter', + rlabel_pos: float = 30, + **kwargs + ) -> tuple[Figure, Axes]: + # conditional bivariate probability function (cbpf) python + # https://davidcarslaw.github.io/openair/reference/polarPlot.html + # https://github.com/davidcarslaw/openair/blob/master/R/polarPlot.R + windrose.WindroseAxes._info = 'WindroseAxes' + + df = df.dropna(subset=[WS, WD] + ([val] if val is not None else [])) + + radius = df[WS].to_numpy() + theta = df[WD].to_numpy() + radian = np.radians(theta) + values = df[val].to_numpy() if val is not None else None + + # In this case, the windrose is a simple frequency diagram, + # the function automatically calculates the radians of the given wind direction. + if typ == 'bar': + fig, ax = plt.subplots(figsize=(5.5, 4), subplot_kw={'projection': 'windrose'}) + fig.subplots_adjust(left=0) + + ax.bar(theta, radius, bins=[0, 1, 2, 3], normed=True, colors=['#0F1035', '#365486', '#7FC7D9', '#DCF2F1']) + ax.set( + ylim=(0, 30), + yticks=[0, 15, 30], + yticklabels=['', '15 %', '30 %'], + rlabel_position=rlabel_pos + ) + ax.set_thetagrids(angles=[0, 45, 90, 135, 180, 225, 270, 315], + labels=["E", "NE", "N", "NW", "W", "SW", "S", "SE"]) + + ax.legend(units='m/s', bbox_to_anchor=[1.1, 0.5], loc='center left', ncol=1) + + # In this case, the windrose is a scatter plot, + # in contrary, this function does not calculate the radians, so user have to input the radian. + else: + fig, ax = plt.subplots(figsize=(5, 4), subplot_kw={'projection': 'windrose'}) + fig.subplots_adjust(left=0) + + scatter = ax.scatter(radian, radius, s=15, c=values, vmax=np.quantile(values, 0.90), edgecolors='none', + cmap='jet', alpha=0.8) + ax.set( + ylim=(0, 7), + yticks=[1, 3, 5, 7], + yticklabels=['1 m/s', '3 m/s', '5 m/s', '7 m/s'], + rlabel_position=rlabel_pos, + theta_direction=-1, + theta_zero_location='N', + title=kwargs.get('title', None) + ) + ax.set_thetagrids(angles=[0, 45, 90, 135, 180, 225, 270, 315], + labels=["N", "NE", "E", "SE", "S", "SW", "W", "NW"]) + + plt.colorbar(scatter, ax=ax, label=Unit(val), pad=0.1, fraction=0.04) + + plt.show() + + return fig, ax diff --git a/AeroViz/plot/optical/optical.py b/AeroViz/plot/optical/optical.py index 65d519e..7b606be 100644 --- a/AeroViz/plot/optical/optical.py +++ b/AeroViz/plot/optical/optical.py @@ -3,11 +3,10 @@ import matplotlib.pyplot as plt import numpy as np -# from PyMieScatt import ScatteringFunction from matplotlib.pyplot import Figure, Axes -from AeroViz.plot.optical.PyMieScatt_update import ScatteringFunction -from AeroViz.plot.optical.mie_theory import Mie_Q, Mie_MEE, Mie_PESD +from AeroViz.dataProcess.Optical.PyMieScatt_update import ScatteringFunction +from AeroViz.dataProcess.Optical.mie_theory import Mie_Q, Mie_MEE, Mie_PESD from AeroViz.plot.utils import * __all__ = ['Q_plot', diff --git a/AeroViz/plot/utils/plt_utils.py b/AeroViz/plot/utils/plt_utils.py index b8119d9..ae79ebb 100644 --- a/AeroViz/plot/utils/plt_utils.py +++ b/AeroViz/plot/utils/plt_utils.py @@ -50,7 +50,7 @@ def wrapper(*args, **kwargs): plt.rcParams['legend.labelspacing'] = 0.7 plt.rcParams['figure.figsize'] = figsize or (4, 4) - plt.rcParams['figure.dpi'] = 200 + plt.rcParams['figure.dpi'] = 300 plt.rcParams['figure.autolayout'] = autolayout if not autolayout: diff --git a/AeroViz/rawDataReader/__init__.py b/AeroViz/rawDataReader/__init__.py index f22a54e..a2550d8 100644 --- a/AeroViz/rawDataReader/__init__.py +++ b/AeroViz/rawDataReader/__init__.py @@ -1,5 +1,6 @@ from datetime import datetime from pathlib import Path +from typing import Any from pandas import Grouper, Timedelta @@ -25,7 +26,7 @@ def RawDataReader(instrument_name: str, end: datetime = None, mean_freq: str = '1h', csv_out: bool = True, - ): + **kwargs: Any): """ Factory function to instantiate the appropriate reader module for a given instrument and return the processed data over the specified time range. @@ -107,7 +108,8 @@ def RawDataReader(instrument_name: str, qc=qc, qc_freq=qc_freq, rate=rate, - append_data=append_data + append_data=append_data, + **kwargs ) return reader_module( start=start, diff --git a/AeroViz/rawDataReader/core/__init__.py b/AeroViz/rawDataReader/core/__init__.py index 000ccfd..c2a7baf 100644 --- a/AeroViz/rawDataReader/core/__init__.py +++ b/AeroViz/rawDataReader/core/__init__.py @@ -1,9 +1,9 @@ import json -import logging from abc import ABC, abstractmethod +from contextlib import contextmanager from datetime import datetime from pathlib import Path -from typing import Optional +from typing import Optional, Generator import numpy as np import pandas as pd @@ -12,6 +12,7 @@ from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn from AeroViz.rawDataReader.config.supported_instruments import meta +from AeroViz.rawDataReader.core.logger import ReaderLogger from AeroViz.rawDataReader.core.qc import DataQualityControl __all__ = ['AbstractReader'] @@ -35,11 +36,12 @@ def __init__(self, qc: bool = True, qc_freq: Optional[str] = None, rate: bool = True, - append_data: bool = False): + append_data: bool = False, + **kwargs): self.path = Path(path) self.meta = meta[self.nam] - self.logger = self._setup_logger() + self.logger = ReaderLogger(self.nam, self.path) self.reset = reset self.qc = qc @@ -53,6 +55,8 @@ def __init__(self, self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv' self.csv_out = self.path / f'output_{self.nam.lower()}.csv' + self.size_range = kwargs.get('size_range', (11.8, 593.5)) + def __call__(self, start: datetime, end: datetime, @@ -78,20 +82,6 @@ def _raw_reader(self, file): def _QC(self, df: DataFrame) -> DataFrame: return df - def _setup_logger(self) -> logging.Logger: - logger = logging.getLogger(self.nam) - logger.setLevel(logging.INFO) - - for handler in logger.handlers[:]: - handler.close() - logger.removeHandler(handler) - - handler = logging.FileHandler(self.path / f'{self.nam}.log') - handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')) - logger.addHandler(handler) - - return logger - def _rate_calculate(self, raw_data, qc_data) -> None: def __base_rate(raw_data, qc_data): period_size = len(raw_data.resample('1h').mean().index) @@ -104,28 +94,27 @@ def __base_rate(raw_data, qc_data): # validate rate calculation if period_size == 0 or sample_size == 0 or qc_size == 0: - print(f'\t\t\033[91m No data for this period... skipping\033[0m') + self.logger.warning(f'\t\t No data for this period... skip') continue - - if period_size < sample_size or sample_size < qc_size: - print( - f'\t\tInvalid size relationship: period={period_size}, sample={sample_size}, QC={qc_size}... skipping') + if period_size < sample_size: + self.logger.warning(f'\t\tError: Sample({sample_size}) > Period({period_size})... skip') + continue + if sample_size < qc_size: + self.logger.warning(f'\t\tError: QC({qc_size}) > Sample({sample_size})... skip') continue else: - _acq_rate = round((sample_size / period_size) * 100, 1) - _yid_rate = round((qc_size / sample_size) * 100, 1) - _OEE_rate = round((qc_size / period_size) * 100, 1) - - self.logger.info(f'{_nam}:') - self.logger.info(f"\tAcquisition rate: {_acq_rate}%") - self.logger.info(f'\tYield rate: {_yid_rate}%') - self.logger.info(f'\tOEE rate: {_OEE_rate}%') - self.logger.info(f"{'=' * 60}") - - print(f'\n\t{_nam} : ') - print(f'\t\tacquisition rate | yield rate -> OEE rate : ' - f'\033[91m{_acq_rate}% | {_yid_rate}% -> {_OEE_rate}%\033[0m') + _sample_rate = round((sample_size / period_size) * 100, 1) + _valid_rate = round((qc_size / sample_size) * 100, 1) + _total_rate = round((qc_size / period_size) * 100, 1) + + self.logger.info(f"\t\t{self.logger.CYAN}▶ {_nam}{self.logger.RESET}") + self.logger.info( + f"\t\t\t├─ {'Sample Rate':15}: {self.logger.BLUE}{_sample_rate:>6.1f}%{self.logger.RESET}") + self.logger.info( + f"\t\t\t├─ {'Valid Rate':15}: {self.logger.BLUE}{_valid_rate:>6.1f}%{self.logger.RESET}") + self.logger.info( + f"\t\t\t└─ {'Total Rate':15}: {self.logger.BLUE}{_total_rate:>6.1f}%{self.logger.RESET}") if self.meta['deter_key'] is not None: # use qc_freq to calculate each period rate @@ -135,9 +124,8 @@ def __base_rate(raw_data, qc_data): for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped): self.logger.info( - f"\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}") - print( - f"\n\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}") + f"\t{self.logger.BLUE}▶ Processing: {_sub_raw_data.index[0].strftime('%F')}" + f" to {_sub_raw_data.index[-1].strftime('%F')}{self.logger.RESET}") __base_rate(_sub_raw_data, _sub_qc_data) @@ -201,6 +189,34 @@ def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None: except Exception as e: raise IOError(f"Error saving data. {e}") + @contextmanager + def progress_reading(self, files: list) -> Generator: + # Create message temporary storage and replace logger method + logs = {level: [] for level in ['info', 'warning', 'error']} + original = {level: getattr(self.logger, level) for level in logs} + + for level, msgs in logs.items(): + setattr(self.logger, level, msgs.append) + + try: + with Progress( + TextColumn("[bold blue]{task.description}", style="bold blue"), + BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"), + TaskProgressColumn(), + TimeRemainingColumn(), + TextColumn("{task.fields[filename]}", style="yellow"), + console=Console(force_terminal=True, color_system="auto", width=120), + expand=False + ) as progress: + task = progress.add_task(f"▶ Reading {self.nam} files", total=len(files), filename="") + yield progress, task + finally: + # Restore logger method and output message + for level, msgs in logs.items(): + setattr(self.logger, level, original[level]) + for msg in msgs: + original[level](msg) + def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]: files = [f for file_pattern in self.meta['pattern'] @@ -212,37 +228,28 @@ def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]: raise FileNotFoundError(f"No files in '{self.path}' could be read. Please check the current path.") df_list = [] - with Progress( - TextColumn("[bold blue]{task.description}", style="bold blue"), - BarColumn(bar_width=18, complete_style="green", finished_style="bright_green"), - TaskProgressColumn(), - TimeRemainingColumn(), - TextColumn("{task.fields[filename]}", style="yellow"), - console=Console(force_terminal=True, color_system="auto"), - expand=False - ) as progress: - task = progress.add_task(f"Reading {self.nam} files", total=len(files), filename="") + + # Context manager for progress bar display + with self.progress_reading(files) as (progress, task): for file in files: progress.update(task, advance=1, filename=file.name) try: - df = self._raw_reader(file) - - if df is not None and not df.empty: + if (df := self._raw_reader(file)) is not None and not df.empty: df_list.append(df) else: - self.logger.warning(f"File {file.name} produced an empty DataFrame or None.") - - except pd.errors.ParserError as e: - self.logger.error(f"Error tokenizing data: {e}") + self.logger.warning(f"\tFile {file.name} produced an empty DataFrame or None.") except Exception as e: self.logger.error(f"Error reading {file.name}: {e}") if not df_list: - raise ValueError("All files were either empty or failed to read.") + raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m") raw_data = concat(df_list, axis=0).groupby(level=0).first() + if self.nam == 'SMPS': + raw_data = raw_data.sort_index(axis=1, key=lambda x: x.astype(float)) + raw_data = self._timeIndex_process(raw_data).apply(to_numeric, errors='coerce').copy(deep=True) qc_data = self._QC(raw_data).apply(to_numeric, errors='coerce').copy(deep=True) @@ -251,29 +258,28 @@ def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]: def _run(self, user_start, user_end): # read pickle if pickle file exists and 'reset=False' or process raw data or append new data if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset: - print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mPICKLE\033[0m " - f"from {user_start} to {user_end}\n") + self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}", color_part="PICKLE") _f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam) if self.append: - print(f"Appending new data from {user_start} to {user_end}") + self.logger.info_box(f"Appending New data from {user_start} to {user_end}", color_part="New data") + _f_raw_new, _f_qc_new = self._read_raw_files() _f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new) _f_qc = self._timeIndex_process(_f_qc_done, append_df=_f_qc_new) + else: _f_raw, _f_qc = _f_raw_done, _f_qc_done + return _f_qc if self.qc else _f_raw else: - print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mRAW DATA\033[0m " - f"from {user_start} to {user_end}\n") + self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}", color_part="RAW DATA") _f_raw, _f_qc = self._read_raw_files() # process time index - data_start, data_end = _f_raw.index.sort_values()[[0, -1]] - _f_raw = self._timeIndex_process(_f_raw, user_start, user_end) _f_qc = self._timeIndex_process(_f_qc, user_start, user_end) _f_qc = self._outlier_process(_f_qc) @@ -281,15 +287,8 @@ def _run(self, user_start, user_end): # save self._save_data(_f_raw, _f_qc) - self.logger.info(f"{'=' * 60}") - self.logger.info(f"Raw data time : {data_start} to {data_end}") - self.logger.info(f"Output time : {user_start} to {user_end}") - self.logger.info(f"{'-' * 60}") - if self.rate: - _f_raw = _f_raw.apply(to_numeric, errors='coerce') - _f_qc = _f_qc.apply(to_numeric, errors='coerce') - self._rate_calculate(_f_raw, _f_qc) + self._rate_calculate(_f_raw.apply(to_numeric, errors='coerce'), _f_qc.apply(to_numeric, errors='coerce')) return _f_qc if self.qc else _f_raw diff --git a/AeroViz/rawDataReader/core/logger.py b/AeroViz/rawDataReader/core/logger.py new file mode 100644 index 0000000..acb0706 --- /dev/null +++ b/AeroViz/rawDataReader/core/logger.py @@ -0,0 +1,78 @@ +import logging +import re +import sys +from pathlib import Path + + +class ReaderLogger: + def __init__(self, name: str, log_path: Path): + self.name = name + self.log_path = log_path + + # ANSI color codes + self.CYAN = '\033[96m' + self.BLUE = '\033[94m' + self.GREEN = '\033[92m' + self.YELLOW = '\033[93m' + self.RED = '\033[91m' + self.RESET = '\033[0m' + + self.logger = self._setup_logger() + + def _setup_logger(self) -> logging.Logger: + logger = logging.getLogger(self.name) + logger.setLevel(logging.INFO) + + # Remove existing handlers + for handler in logger.handlers[:]: + handler.close() + logger.removeHandler(handler) + + # clean ANSI formatter (for log file) + class CleanFormatter(logging.Formatter): + def format(self, record): + formatted_msg = super().format(record) + return re.sub(r'\033\[[0-9;]*m', '', formatted_msg) + + # Set up handlers + file_handler = logging.FileHandler(self.log_path / f'{self.name}.log') + file_handler.setFormatter(CleanFormatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')) + + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(logging.Formatter('%(message)s')) + + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + return logger + + def info(self, msg: str): + self.logger.info(msg) + + def warning(self, msg: str): + self.logger.warning(msg) + + def error(self, msg: str): + self.logger.error(msg) + + def info_box(self, text: str, color_part: str = None, width: int = 80): + """ + Create a boxed message with optional colored text + + Args: + text: Base text format (e.g., "Reading {} RAW DATA from {} to {}") + color_part: Part of text to be colored (e.g., "RAW DATA") + width: Box width + """ + display_text = text.replace(color_part, " " * len(color_part)) if color_part else text + + left_padding = " " * ((width - len(display_text)) // 2) + right_padding = " " * (width - len(display_text) - len(left_padding)) + + content = text.replace(color_part, f"{self.CYAN}{color_part}{self.RESET}") if color_part else text + + __content__ = f"{left_padding}{content}{right_padding}" + + self.info(f"╔{'═' * width}╗") + self.info(f"║{__content__}║") + self.info(f"╚{'═' * width}╝") diff --git a/AeroViz/rawDataReader/script/EPA.py b/AeroViz/rawDataReader/script/EPA.py index 87d63f4..238e757 100644 --- a/AeroViz/rawDataReader/script/EPA.py +++ b/AeroViz/rawDataReader/script/EPA.py @@ -18,7 +18,7 @@ def _raw_reader(self, file): on_bad_lines='skip') if len(df.groupby('測站')) > 1: - raise ValueError(f'Multiple stations found in the file: {df['測站'].unique()}') + raise ValueError(f"Multiple stations found in the file: {df['測站'].unique()}") else: if '測站' in df.columns: df.drop(columns=['測站'], inplace=True) diff --git a/AeroViz/rawDataReader/script/Minion.py b/AeroViz/rawDataReader/script/Minion.py index 4190686..be5f0b9 100644 --- a/AeroViz/rawDataReader/script/Minion.py +++ b/AeroViz/rawDataReader/script/Minion.py @@ -149,7 +149,7 @@ def XRF_QAQC(self, columns_to_convert = [col for col in MDL.keys() if col in df.columns] df[columns_to_convert] = df[columns_to_convert].div(1000) - self.logger.info(f"XRF QAQC summary: transform values below MDL to {MDL_replace}") + self.logger.info(f"\t{'XRF QAQC summary':21}: transform values below MDL to {MDL_replace}") return df @@ -206,9 +206,10 @@ def IGAC_QAQC(self, # 計算保留的数據的百分比 retained_percentage = (valid_mask.sum() / len(df)) * 100 - self.logger.info(f"Ions balance summary: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}") + self.logger.info( + f"\t{'Ions balance summary':21}: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}") if retained_percentage < 70: - self.logger.warning("Warning: The percentage of retained data is less than 70%") + self.logger.warning("\tWarning: The percentage of retained data is less than 70%") return df diff --git a/AeroViz/rawDataReader/script/SMPS.py b/AeroViz/rawDataReader/script/SMPS.py index 00de43d..5620af7 100644 --- a/AeroViz/rawDataReader/script/SMPS.py +++ b/AeroViz/rawDataReader/script/SMPS.py @@ -1,7 +1,7 @@ import csv import numpy as np -from pandas import to_datetime, to_numeric, read_csv, isna +from pandas import to_datetime, to_numeric, read_csv from AeroViz.rawDataReader.core import AbstractReader @@ -40,7 +40,7 @@ def _raw_reader(self, file): for date_format in date_formats: _time_index = parse_date(_df, date_format) - if not isna(_time_index).all(): + if not _time_index.isna().all(): break else: raise ValueError("Unable to parse dates with given formats") @@ -56,14 +56,17 @@ def _raw_reader(self, file): _df_smps.columns = _df_smps.columns.astype(float) _df_smps = _df_smps.loc[_df_smps.index.dropna().copy()] - if _df_smps.columns[0] != 11.8: - print(f'file_name: {file.name}') - return None + if _df_smps.columns[0] != self.size_range[0] or _df_smps.columns[-1] != self.size_range[1]: + self.logger.info(f'\tSMPS file: {file.name} is not match the default size range {self.size_range}, ' + f'it is ({_df_smps.columns[0]}, {_df_smps.columns[-1]})') return _df_smps.apply(to_numeric, errors='coerce') # QC data def _QC(self, _df): + size_range_mask = (_df.columns.astype(float) >= self.size_range[0]) & ( + _df.columns.astype(float) <= self.size_range[1]) + _df = _df.loc[:, size_range_mask] # mask out the data size lower than 7 _df['total'] = _df.sum(axis=1, min_count=1) * (np.diff(np.log(_df.keys().to_numpy(float)))).mean() @@ -74,8 +77,7 @@ def _QC(self, _df): _df = _df.mask(_df['total'] < 2000) # remove the bin over 400 nm which num. conc. larger than 4000 - _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.] - + _df_remv_ky = _df.keys()[:-1][_df.keys()[:-1] >= 400.] _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.) return _df[_df.keys()[:-1]] diff --git a/AeroViz/tools/__init__.py b/AeroViz/tools/__init__.py index f917fe6..3b64717 100644 --- a/AeroViz/tools/__init__.py +++ b/AeroViz/tools/__init__.py @@ -1,3 +1,2 @@ from .database import DataBase from .dataclassifier import DataClassifier -from .datareader import DataReader diff --git a/AeroViz/tools/datareader.py b/AeroViz/tools/datareader.py deleted file mode 100644 index 7ead768..0000000 --- a/AeroViz/tools/datareader.py +++ /dev/null @@ -1,66 +0,0 @@ -from abc import ABC, abstractmethod -from pathlib import Path - -from pandas import read_csv, read_json, read_excel, DataFrame - - -class FileHandler(ABC): - """ An abstract base class for reading data files with different extensions (.csv, .json, .xls, .xlsx). """ - - @abstractmethod - def read_data(self, file_path: Path) -> DataFrame: - pass - - -class CsvFileHandler(FileHandler): - def read_data(self, file_path: Path) -> DataFrame: - return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True, - low_memory=False) - - -class JsonFileHandler(FileHandler): - def read_data(self, file_path: Path) -> DataFrame: - return read_json(file_path) - - -class ExcelFileHandler(FileHandler): - def read_data(self, file_path: Path) -> DataFrame: - return read_excel(file_path, index_col=0, parse_dates=True, ) - - -class DataReaderFactory: - _handler_mapping = { - '.csv': CsvFileHandler(), - '.json': JsonFileHandler(), - '.xls': ExcelFileHandler(), - '.xlsx': ExcelFileHandler(), - } - - @staticmethod - def create_handler(file_extension: str) -> FileHandler: - reader_class = DataReaderFactory._handler_mapping.get(file_extension) - if reader_class is None: - raise ValueError(f"Unsupported file format: {file_extension}") - return reader_class - - -class DataReader: - """ - A class for reading data files with different extensions (.csv, .json, .xls, .xlsx). - - Parameters - ---------- - filename (Path | str): The name of the file to be read or the Path of the file. - - Returns - ------- - pandas.DataFrame: data - - Examples - -------- - >>> psd = DataReader(Path(...)) - """ - - def __new__(cls, file_path: Path | str) -> DataFrame: - file_path = Path(file_path) - return DataReaderFactory.create_handler(file_path.suffix.lower()).read_data(file_path) diff --git a/README.md b/README.md index 729a4d5..2486ae5 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ ##
AeroViz for Aerosol Science Visualization
-

+

- Static Badge - Static Badge - Static Badge - Static Badge +![Python](https://img.shields.io/pypi/pyversions/aeroviz?logo=python) +![PyPI](https://img.shields.io/pypi/v/aeroviz?logo=pypi) +![Pytest](https://img.shields.io/github/actions/workflow/status/Alex870521/aeroviz/pytest.yml?logo=pytest&label=pytest) +![GitHub last commit](https://img.shields.io/github/last-commit/Alex870521/aeroviz?logo=github) -

+
@@ -16,26 +16,41 @@ Alex870521 LinkedIn Alex870521 Medium -
+##
Installation
+```bash +pip install AeroViz +``` + ##
Key Features
-* Data Reading: Supports reading multiple aerosol data formats. -* Data Visualization: Offers various charts and graphs, including time series plots, distribution plots, and correlation - matrices. -* Data Processing: Includes multiple data processing tools, such as linear regression and Mie theory calculations. +### 📊 Data Reading ▶ RawDataReader +Built-in `RawDataReader` supporting multiple aerosol instruments: +- **Particle Sizers**: SMPS, APS, GRIMM, OPC +- **Mass & Optical**: TEOM, NEPH, Aurora, AE33/43, BC1054 +- **Chemical Analysis**: OCEC, IGAC, XRF, VOC -##
Installation
+> Features include quality control, data filtering, flexible resampling, and CSV export. For detailed instrument support +> and usage, check our [RawDataReader Guide](docs/guide/RawDataReader.md). -```bash -pip install AeroViz -``` +### 🔬 Data Processing ▶ DataProcess -For Windows users: Run `install_windows.bat` +Built-in `DataProcess` provides advanced aerosol analysis: +- **Size Distribution**: Mode Fitting, Log-Normal Analysis +- **Optical Properties**: Mie Theory, SOAP Calculation +- **Chemical**: Mass Closure, Source Apportionment +- **VOC**: OFP, SOAP -For Linux and Mac users: Run `install_unix.bat` +### 📈 Data Visualization ▶ plot + +Comprehensive visualization tools `plot`: +- **Time Analysis**: Trends, Diurnal Patterns +- **Statistical**: Distributions, Correlations +- **Specialized**: Size Contours, Wind Rose, Polar Plots, Hysplit, CBPF + +> **Note:** We are continuously adding support for more instruments and features. Contributions are welcome! ##
Quick Start
@@ -51,56 +66,10 @@ data = RawDataReader('NEPH', Path('/path/to/data'), start=datetime(2024, 2, 1), plot.timeseries(data, y='scattering_coefficient') ``` -For more detailed usage instructions, please refer to our [User Guide](). - -##
RawDataReader - -RawDataReader supports a wide range of aerosol instruments, including NEPH, SMPS, AE33, and many more. It handles -various file types and time resolutions, making data processing efficient and standardized. +For more detailed usage instructions, please refer to our [User Guide](docs/guide). -For a detailed list of supported instruments, file types, and data columns, please refer to -our [RawDataReader Usage Guide](docs/guide/RawDataReader) in the `docs` folder. - -### Key Features: - -- Supports multiple aerosol instruments -- Applies customizable quality control measures -- Offers flexible data filtering and resampling options -- Enables easy data export to CSV format - -### Supported Instruments - -The AeroViz project currently supports data from the following instruments: - -- SMPS (Scanning Mobility Particle Sizer) -- APS (Aerodynamic Particle Sizer) -- GRIMM (GRIMM Aerosol Technik) -- TEOM (Continuous Ambient Particulate Monitor) -- NEPH (Nephelometer) -- Aurora (Nephelometer) -- AE33 (Aethalometer Model 33) -- AE43 (Aethalometer Model 43) -- BC1054 (Black Carbon Monitor 1054) -- MA350 (MicroAeth MA350) -- OCEC (Organic Carbon Elemental Carbon Analyzer) -- IGAC (In-situ Gas and Aerosol Compositions monitor) -- XRF (X-ray Fluorescence Spectrometer) -- VOC (Volatile Organic Compounds Monitor) - -> **Note:** We are continuously working to support more instruments. Please check back for updates or contribute to our -> project on GitHub. - -##
DataProcess
- -The AeroViz project currently supports the following processing methods: - -- **Chemistry**: -- **Optical** -- **SizeDistr** -- **VOC** ##
Documentation
- For detailed documentation, please refer to the `docs` folder, which includes:
@@ -108,18 +77,10 @@ For detailed documentation, please refer to the `docs` folder, which includes: | Documentation | Description | |--------------------------------|--------------------------| | [User Guide](docs/guide) | Basic usage instructions | -| [Changelog](docs/changelog.md) | List of changes | - +| [Changelog](docs/CHANGELOG.md) | List of changes |
-##
Related Source
- -* #### [PyMieScatt](https://github.com/bsumlin/PyMieScatt.git) -* #### [py-smps](https://github.com/quant-aq/py-smps.git) -* #### [ContainerHandle](https://github.com/yrr-Su/ContainerHandle.git) - ##
Contact
- For bug reports and feature requests please visit [GitHub Issues](https://github.com/Alex870521/DataPlot/issues).
@@ -129,6 +90,4 @@ For bug reports and feature requests please visit [GitHub Issues](https://github Alex870521 LinkedIn Alex870521 Medium - -
\ No newline at end of file diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md new file mode 100644 index 0000000..a225887 --- /dev/null +++ b/docs/CHANGELOG.md @@ -0,0 +1,12 @@ +## v0.1.9.6 (2024-11-07) + +### Fix + +- set SMPS default size range (11.8, 593.5) + +### Refactor + +- **logger**: enhance progress bar visualization and formatting +- minor syntax improvements + +## v0.1.9.5 (2024-10-24) diff --git a/docs/changelog.md b/docs/changelog.md deleted file mode 100644 index e69de29..0000000 diff --git a/docs/guide/RawDataReader.md b/docs/guide/RawDataReader.md index 784ece8..ec5d393 100644 --- a/docs/guide/RawDataReader.md +++ b/docs/guide/RawDataReader.md @@ -140,25 +140,26 @@ This will display the first few rows of the processed data, including timestamps - `csv_out`: If True, output processed data as CSV # Supported Instruments: Default Time Resolutions and File Types - -| Instrument | Time Resolution | File Type | Display Columns | QAQC method | -|:----------:|:---------------:|:------------|-------------------------------------------------------|:-----------:| -| NEPH | 5min | .dat | G | default | -| Aurora | 1min | .csv | G | default | -| SMPS | 6min | .txt, .csv | all | default | -| GRIMM | 6min | .dat | all | default | -| APS_3321 | 6min | .txt | all | default | -| AE33 | 1min | .dat | BC6 | default | -| AE43 | 1min | .dat | BC6 | default | -| BC1054 | 1min | .csv | BC9 | default | -| MA350 | 1min | .csv | BC5 | default | -| TEOM | 6min | .csv | PM_Total, PM_NV | default | -| OCEC | 1h | *LCRes.csv | Thermal_OC, Thermal_EC, Optical_OC, Optical_EC | default | -| IGAC | 1h | .csv | Na+, NH4+, K+, Mg2+, Ca2+, Cl-, NO2-, NO3-, SO42- | default | -| XRF | 1h | .csv | Al, Si, P, S, Cl, K, Ca, Ti, V, Cr, Mn, Fe, Ni, Cu... | default | -| VOC | 1h | .csv | voc | default | -| EPA | 1h | .csv | all | default | -| Minion | 1h | .csv, .xlsx | Na+, NH4+, Cl-, NO3-, SO42-, Al, Ti, V, Cr, Mn, Fe | default | +### The AeroViz project currently supports data from the following instruments: + +| Instrument | Time Resolution | File Type | Display Columns | QAQC method | +|:------------------------------------------------------:|:---------------:|:------------|-------------------------------------------------------|:-----------:| +| NEPH (Nephelometer) | 5min | .dat | G | default | +| Aurora (Nephelometer) | 1min | .csv | G | default | +| SMPS (Scanning Mobility Particle Sizer) | 6min | .txt, .csv | all | default | +| GRIMM (GRIMM Aerosol Technik) | 6min | .dat | all | default | +| APS_3321 (Aerodynamic Particle Sizer) | 6min | .txt | all | default | +| AE33 (Aethalometer Model 33) | 1min | .dat | BC6 | default | +| AE43 (Aethalometer Model 43) | 1min | .dat | BC6 | default | +| BC1054 (Black Carbon Monitor 1054) | 1min | .csv | BC9 | default | +| MA350 (MicroAeth MA350) | 1min | .csv | BC5 | default | +| TEOM (Continuous Ambient Particulate Monitor) | 6min | .csv | PM_Total, PM_NV | default | +| OCEC (Sunset Organic Carbon Elemental Carbon Analyzer) | 1h | *LCRes.csv | Thermal_OC, Thermal_EC, Optical_OC, Optical_EC | default | +| IGAC (In-situ Gas and Aerosol Compositions monitor) | 1h | .csv | Na+, NH4+, K+, Mg2+, Ca2+, Cl-, NO2-, NO3-, SO42- | default | +| XRF (X-ray Fluorescence Spectrometer) | 1h | .csv | Al, Si, P, S, Cl, K, Ca, Ti, V, Cr, Mn, Fe, Ni, Cu... | default | +| VOC (Volatile Organic Compounds Monitor) | 1h | .csv | voc | default | +| EPA | 1h | .csv | all | default | +| Minion | 1h | .csv, .xlsx | Na+, NH4+, Cl-, NO3-, SO42-, Al, Ti, V, Cr, Mn, Fe | default | ```{note} Notes: diff --git a/docs/guide/plot.md b/docs/guide/plot.md index 5f0c912..4cc2c64 100644 --- a/docs/guide/plot.md +++ b/docs/guide/plot.md @@ -11,10 +11,10 @@ df = DataBase() # build default data, uers can use their own data # wind rose plot.meteorology.wind_rose(df, 'WS', 'WD', typ='bar') -plot.meteorology.wind_rose(df, 'WS', 'WD', 'PM25', typ='scatter') +plot.meteorology.wind_rose(df, 'WS', 'WD', 'PM2.5', typ='scatter') -plot.meteorology.CBPF(df, 'WS', 'WD', 'PM25') -plot.meteorology.CBPF(df, 'WS', 'WD', 'PM25', percentile=[75, 100]) +plot.meteorology.CBPF(df, 'WS', 'WD', 'PM2.5') +plot.meteorology.CBPF(df, 'WS', 'WD', 'PM2.5', percentile=[75, 100]) ``` ###
Linear Regression
@@ -67,11 +67,11 @@ plot.timeseries.timeseries_template(df.loc['2021-02-01', '2021-03-31']) ```python from pathlib import Path from AeroViz import plot -from AeroViz.tools import DataBase, DataReader +from AeroViz.tools import DataBase df = DataBase() # build default data, uers can use their own data -PNSD = DataReader(Path(__file__)/'AeroViz'/'data'/'DEFAULT_PNSD_DATA.csv') +PNSD = DataBase('DEFAULT_PNSD_DATA.csv') plot.distribution.distribution.heatmap(PNSD, unit='Number') plot.distribution.distribution.heatmap_tms(PNSD, unit='Number', freq='60d') diff --git a/pyproject.toml b/pyproject.toml index de06328..a6e126b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,21 +4,22 @@ build-backend = "setuptools.build_meta" [project] name = "AeroViz" -version = "0.1.9.5" +version = "0.1.10" description = "Aerosol science" authors = [{ name = "alex", email = "alex870521@gmail.com" }] license = { text = "MIT" } readme = "README.md" -requires-python = ">=3.12" +requires-python = ">=3.11" classifiers = [ "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] dependencies = [ - "pandas==2.2.2", - "numpy==1.26.4", + "pandas>=2.2.0", + "numpy>=1.26.4", "matplotlib==3.8.4", "scipy==1.14.0", "seaborn==0.13.2", @@ -49,6 +50,22 @@ docs = [ "mkdocstrings[python]>=0.18.0", ] +[tool.pytest.ini_options] +pythonpath = "." +markers = [ + "requires_data: marks tests that require actual data files", +] + + +[tool.commitizen] +name = "cz_conventional_commits" +tag_format = "v$version" +changelog_file = "docs/CHANGELOG.md" +version_scheme = "pep440" +version_provider = "pep621" +update_changelog_on_bump = true +major_version_zero = true + [project.urls] Homepage = "https://github.com/Alex870521/AeroViz" Repository = "https://github.com/Alex870521/AeroViz" diff --git a/tests/test_RawDataReader.py b/tests/test_RawDataReader.py index d8abea8..36d4d23 100644 --- a/tests/test_RawDataReader.py +++ b/tests/test_RawDataReader.py @@ -2,9 +2,12 @@ from datetime import datetime from pathlib import Path +import pytest + from AeroViz import RawDataReader +@pytest.mark.requires_data class TestRawDataReader(unittest.TestCase): @classmethod diff --git a/tests/test_aeroviz_import.py b/tests/test_import.py similarity index 82% rename from tests/test_aeroviz_import.py rename to tests/test_import.py index c25cd75..40df69f 100644 --- a/tests/test_aeroviz_import.py +++ b/tests/test_import.py @@ -8,7 +8,7 @@ def test_imports(self): from AeroViz import plot from AeroViz.dataProcess import DataProcess from AeroViz.rawDataReader import RawDataReader - from AeroViz.tools import DataBase, DataReader, DataClassifier + from AeroViz.tools import DataBase, DataClassifier self.assertTrue(True) except ImportError as e: @@ -16,4 +16,4 @@ def test_imports(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main()