diff --git a/.github/workflows/CI_check_version.yml b/.github/workflows/CI_check_version.yml index b6f64f6..769131c 100644 --- a/.github/workflows/CI_check_version.yml +++ b/.github/workflows/CI_check_version.yml @@ -21,10 +21,10 @@ jobs: steps: - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.11' diff --git a/.github/workflows/CI_docs_build_and_check.yml b/.github/workflows/CI_docs_build_and_check.yml index ae37ecf..e9ede0d 100644 --- a/.github/workflows/CI_docs_build_and_check.yml +++ b/.github/workflows/CI_docs_build_and_check.yml @@ -21,11 +21,11 @@ jobs: steps: # Checkout our repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Set up Python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/CI_docs_build_and_publish.yml b/.github/workflows/CI_docs_build_and_publish.yml index e4fd15c..455c86d 100644 --- a/.github/workflows/CI_docs_build_and_publish.yml +++ b/.github/workflows/CI_docs_build_and_publish.yml @@ -28,11 +28,11 @@ jobs: steps: # Checkout our repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Also check out the live docs, placing it into a pseudo "build" folder for the docs - name: Checkout live docs - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: MeteoSwiss/ampycloud ref: gh-pages @@ -51,7 +51,7 @@ jobs: # Set up Python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/CI_pylinter.yml b/.github/workflows/CI_pylinter.yml index 9c0b2a5..1496586 100644 --- a/.github/workflows/CI_pylinter.yml +++ b/.github/workflows/CI_pylinter.yml @@ -23,16 +23,16 @@ jobs: steps: # Checkout our repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Set up Python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} # Install any dependency we require - - name: Install dependancies + - name: Install dependencies run: | python -m pip install --upgrade pip shell: bash diff --git a/.github/workflows/CI_pypi.yml b/.github/workflows/CI_pypi.yml index fed29bc..d562031 100644 --- a/.github/workflows/CI_pypi.yml +++ b/.github/workflows/CI_pypi.yml @@ -19,10 +19,10 @@ jobs: steps: - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.11' diff --git a/.github/workflows/CI_pytest.yml b/.github/workflows/CI_pytest.yml index 19a779f..2500482 100644 --- a/.github/workflows/CI_pytest.yml +++ b/.github/workflows/CI_pytest.yml @@ -22,15 +22,18 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: ['3.9', '3.10', '3.11'] + exclude: + - os: macos-latest + python-version: '3.9' steps: # Checkout the repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Setup python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -42,13 +45,11 @@ jobs: if: matrix.os == 'windows-latest' run: | Get-Command python | Select-Object -ExpandProperty Definition - echo $CONDA - name: Check the setup (II) if: matrix.os != 'windows-latest' run: | which python - which conda # Install all the dependencies we require - name: Install dependencies diff --git a/.github/workflows/CI_pytest_weekly.yml b/.github/workflows/CI_pytest_weekly.yml index 0ef622f..421d69d 100644 --- a/.github/workflows/CI_pytest_weekly.yml +++ b/.github/workflows/CI_pytest_weekly.yml @@ -10,6 +10,7 @@ name: CI_pytest_weekly on: + workflow_dispatch: # Allow to trigger the weekly tests manually schedule: - cron: '43 21 * * 1,4' # Run at 21:43 UTC every Monday and Thursday @@ -22,18 +23,26 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - # Checkout the master branch from the repository + # Checkout the master branch if this is a scheduled test # The idea for this Action is to spot issues with new dependency versions as soon as they # are released (and not when we decide to update ampycloud). - - name: Checkout current repository - uses: actions/checkout@v3 + - name: Checkout current repository (master branch) + if: ${{ github.event_name == 'schedule' }} + uses: actions/checkout@v4 with: repository: MeteoSwiss/ampycloud ref: master + # Alternatively, checkout whichever branch was selected by the user upon the manual trigger. + - name: Checkout current repository (custom branch) + if: ${{ github.event_name == 'workflow_dispatch' }} + uses: actions/checkout@v4 + with: + repository: MeteoSwiss/ampycloud + # Setup python - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' diff --git a/.github/workflows/CI_speed_check.yml b/.github/workflows/CI_speed_check.yml index 2644294..46b7bbe 100644 --- a/.github/workflows/CI_speed_check.yml +++ b/.github/workflows/CI_speed_check.yml @@ -19,11 +19,11 @@ jobs: steps: # Checkout our repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Set up Python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/CHANGELOG b/CHANGELOG index 1c1a1b7..1bc0137 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,15 +3,33 @@ All notable changes to ampycloud will be documented in this file. The format is inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v2.0.0] +### Added: + - [fpavogt, 2024-06-26] Add ability to trigger weekly tests manually. + - [fpavogt, 2024-04-16] Improve input data consistency check (fix #116). + - [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message. + - [fpavogt, 2024-03-26] Add option to reset only a single parameter. +### Fixed: + - [srethore, 2024-04-23] Fix static types issues. + - [regDaniel, 2024-04-23] Fix deprecated github actions worklflow file + - [fpavogt, 2024-04-17] Fix #119 + - [regDaniel, 2024-04-09] Minor adaptions to better comply with PEP standards. + - [fpavogt, 2024-03-26] Use "height" in "ft aal" throughout. +### Changed: + - [fpavogt, 2024-06-25] Add support for numpy >2.0 + - [fpavogt, 2024-04-28] Add a rogue point to the mock example. + - [fpavogt, 2024-03-25] Set the fluffiness boost to 2 (fixes #123). + - [fpavogt, 2024-03-23] Changed default mock dataset to be 900s long (instead of 2400s). + ## [v1.0.0] -### Added - - [regDaniel, 2023-11-09] Add minimum separation condition for grouping step -### Fixed - - [regDaniel, 2023-11-09] Fix #107 (no more reporting of two layers at same height) -### Changed +### Added: + - [regDaniel, 2023-11-09] Add minimum separation condition for grouping step. +### Fixed: + - [regDaniel, 2023-11-09] Fix #107 (no more reporting of two layers at same height). +### Changed: - [fpavogt, 2023-11-05] Updated version to 1.0.0 for first full release, incl. misc documentation changes. -- [regDaniel, 2023-11-09] Changed min sep in layering from layer mean to layer base altitude -- [regDaniel, 2023-11-10] Refactoring (modularization) of metarize method +- [regDaniel, 2023-11-09] Changed min sep in layering from layer mean to layer base altitude. +- [regDaniel, 2023-11-10] Refactoring (modularization) of metarize method. ## [v0.6.0.dev0] ### Added: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 00b3f55..1488b6f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,15 +2,15 @@ If you: -* :boom: want to **report a bug** with ampycloud: [jump here.](https://github.com/MeteoSwiss/ampycloud/issues) -* :question: have a **question** about ampycloud: [jump here instead.](https://github.com/MeteoSwiss/ampycloud/discussions) -* :construction_worker: want to **contribute** to ampycloud (:heart_eyes: :tada:): read on ! +* :boom: want to **report a bug** with ampycloud: [jump here.](https://github.com/MeteoSwiss/ampycloud/issues) +* :question: have a **question** about ampycloud: [jump here instead.](https://github.com/MeteoSwiss/ampycloud/discussions) +* :construction_worker: want to **contribute** to ampycloud, read on ! ## Table of contents - [Code of conduct](#code-of-conduct) -- [Scope of ampycloud](#scope-of-ampycloud) +- [Scope](#scope) - [Essential things to know about ampycloud for dev work](#essential-things-to-know-about-ampycloud-for-dev-work) - [Branching model](#branching-model) - [Installing from source](#installing-from-source) @@ -23,7 +23,7 @@ If you: - [Documentation](#documentation) - [Testing](#testing) - [Plotting](#plotting) - - [Release mechanisms](#release-mechanisms) + - [Release mechanism](#release-mechanism) - [Less-Essential things to know about ampycloud for dev work](#less-essential-things-to-know-about-ampycloud-for-dev-work) - [Updating the copyright years](#updating-the-copyright-years) @@ -31,10 +31,9 @@ If you: ## Code of conduct This project and everyone participating in it is governed by the [ampycloud Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. -Please report unacceptable behavior to [loris.foresti@meteoswiss.ch](mailto:loris.foresti@meteoswiss.ch) and/or [frederic.vogt@meteoswiss.ch](mailto:frederic.vogt@meteoswiss.ch). +Please report unacceptable behavior to [loris.foresti@meteoswiss.ch](mailto:loris.foresti@meteoswiss.ch). - -## Scope of ampycloud +## Scope Please be sure to read (and understand the implications) of the [scope of ampycloud](https://meteoswiss.github.io/ampycloud/index.html#scope-of-ampycloud). @@ -86,9 +85,11 @@ Automated CI/CD checks are triggered upon Pull Requests being issued towards the * automatic publication of the Sphinx docs (for a PR to `master` only) * check that the code version was incremented (for PR to `master` only) -To test the code with the latest Python developments, a `pytest-weeklz` workflow runs the +To test the latest release of the code with the latest Python developments, a `pytest-weekly` workflow runs the ampycloud tests twice a week using the latest version of Python and of the ampycloud dependencies. +:warning: This test is being run from the `master` branch. Pushing a bug fix to `develop` will not be sufficient to make it turn green - a new code release is necessary ! + There is another Github action responsible for publishing the code onto pypi, that gets triggered upon a new release or pre-release being published. See the ampycloud [release mechanisms](#release-mechanisms) for details. @@ -293,7 +294,7 @@ def some_plot_function(...): With this decorator, all functions will automatically deploy the effects associated to the value of `dynamic.AMPYCLOUD_PRMS['MPL_STYLE']` which can take one of the following values: `['base', 'latex', 'metsymb']`. -### Release mechanisms +### Release mechanism When changes merged in the `develop` branch are stable and deemed *worthy*, follow these steps to create a new release of ampycloud: @@ -325,13 +326,14 @@ create a new release of ampycloud: - on the [release page](https://github.com/MeteoSwiss/ampycloud/releases), - in the [README](https://github.com/MeteoSwiss/ampycloud/blob/develop/README.md) tags, - on [testpypi](https://test.pypi.org/project/ampycloud/) and [pypi](https://pypi.org/project/ampycloud/), - - on the [`gh-pages` branch](https://github.com/MeteoSwiss/ampycloud/tree/gh-pages), and - - in the [live documentation](https://MeteoSwiss.github.io/ampycloud). + - on the [`gh-pages` branch](https://github.com/MeteoSwiss/ampycloud/tree/gh-pages), + - in the [live documentation](https://MeteoSwiss.github.io/ampycloud), and + - on [Zenodo](https://zenodo.org/doi/10.5281/zenodo.8399683) (for which the connection to this repo is enabled from Zenodo itself, by the admins of the MeteoSwiss organization on Github). ## Less-Essential things to know about ampycloud for dev work ### Updating the copyright years -The ampycloud copyright years may need to be updated if the development goes on beyond 2022. If so, +The ampycloud copyright years may need to be updated if the development goes on beyond 2022 (which it already has 😉). If so, the copyright years will need to be manually updated in the following locations: * `docs/source/substitutions.rst` (the copyright tag) diff --git a/README.md b/README.md index cd33d24..26efe44 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ For the full documentation, installation instructions, etc ..., go to: https://m ### License & Copyright -ampycloud is released under the terms of **the 3-Clause BSD license**. The copyright (2021-2023) belongs to MeteoSwiss. +ampycloud is released under the terms of **the 3-Clause BSD license**. The copyright (2021-2024) belongs to MeteoSwiss. ### Contributing to ampycloud diff --git a/docs/source/conf.py b/docs/source/conf.py index 276f9e8..e876ebd 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,6 +13,7 @@ # from pathlib import Path +from typing import Any # Define file_absolute_path = Path(__file__).absolute() @@ -28,7 +29,7 @@ # -- Project information ----------------------------------------------------- project = 'ampycloud' -copyright = '2021-2023, MeteoSwiss' +copyright = '2021-2024, MeteoSwiss' author = 'Frédéric P.A. Vogt' version = vers @@ -63,7 +64,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] +exclude_patterns: list[Any] = [] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' diff --git a/docs/source/index.rst b/docs/source/index.rst index 4c68246..b6d7c06 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -18,10 +18,19 @@ Welcome to the ampycloud documentation -------------------------------------- * **What:** ampycloud refers to both a **Python package** and the **algorithm** at its core, designed - to characterize cloud layers (i.e. height and sky coverage fraction) using ceilometer measurements - (i.e. automatic cloud base *hits*), and derive a corresponding METAR-like message. + to characterize cloud layers (i.e. sky coverage fraction and base height) using ceilometer measurements + (i.e. cloud base *hits*), and derive a corresponding METAR-like message. A visual illustration of the algorithm is visible in :numref:`fig-demo`. + .. note:: + At the moment, ampycloud **cannot** use backscatter profiles to derive cloud base hits independantly. + + .. note:: + ampycloud does not challenge the quality/nature of the cloud base hits that it is being provided. + It trusts them all fully and equally. The capacity of the algorithm to provide an accurate + assessment of cloud layers above an aerodrome is thus directly limited by the ability of + ceilometers to report clouds up to the aerodrome's Minimum Sector Altitude in the first place. + * **Where:** ampycloud lives in `a dedicated repository `_ under the `MeteoSwiss organization `_ on Github, where you can submit all your `questions `_ and diff --git a/docs/source/running.rst b/docs/source/running.rst index f0d941e..6846be9 100644 --- a/docs/source/running.rst +++ b/docs/source/running.rst @@ -19,7 +19,7 @@ A no-words example for those that want to get started quickly # Your data should have *exactly* this structure mock_data = mocker.canonical_demo_data() - # Run the ampycloud algorithm on it, setting the MSA to 10'000 ft + # Run the ampycloud algorithm on it, setting the MSA to 10'000 ft aal chunk = ampycloud.run(mock_data, prms={'MSA': 10000}, geoloc='Mock data', ref_dt=datetime.now()) @@ -36,8 +36,8 @@ A no-words example for those that want to get started quickly The input data -------------- -The ampycloud algorithm is meant to process cloud base *hits* from ceilometer observations. A given -set of hits to be processed by the ampycloud package must be stored inside a +The ampycloud algorithm is meant to process cloud base *hits* derived from ceilometer observations. +A given set of hits to be processed by the ampycloud package must be stored inside a :py:class:`pandas.DataFrame` with a specific set of characteristics outlined below. Users can use the following utility function to check whether a given :py:class:`pandas.DataFrame` meets all the requirements of ampycloud. diff --git a/docs/source/scope.rst b/docs/source/scope.rst index fe23e06..76e80c0 100644 --- a/docs/source/scope.rst +++ b/docs/source/scope.rst @@ -3,11 +3,21 @@ which requires all its dependencies to be robust and stable. This has the following implications for ampycloud: - * The scope of ampycloud will remain limited to the **automatic processing of ceilometer - hits**. In particular, ampycloud does not process Vertical Visibility (VV) measurements. - Depending on the ceilometer type, the user will need to decide how to treat VV hits *before* - passing them to ampycloud, e.g. by removing them or by converting them to cloud base - heights. + * The scope of ampycloud will remain limited to the **automatic processing of cloud base + hits derived using ceilometers**. Furthermore, ampycloud does not process + Vertical Visibility (VV) measurements. Depending on the ceilometer type, the user will need + to decide how to treat VV hits *before* passing them to ampycloud, e.g. by removing them or + by converting them to cloud base heights. + + * Note that regulation says that "if there are no clouds of operational significance + and no restriction on vertical visibility and the abbreviation 'CAVOK' is not + appropriate, the abbreviation 'NSC' should be used" (AMC1 MET.TR.205(e)(1)). + ampycloud cannot decide whether a 'CAVOK' is appropriate, and will therefore + always return 'NSC' if no clouds of operational significance are found. If no clouds + are detected at all by the ceilometers, ampycloud will return 'NCD'. Importantly, + users should bear in mind that ampycloud cannot handle CB and TCU cases, + such that any 'NCD'/'NSC' codes issued may need to be overwritten by the user in + certain situations. * ampycloud can evidently be used for R&D work, but the code itself should not be seen as an R&D platform. diff --git a/setup.py b/setup.py index a324922..a529d1d 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -35,9 +35,10 @@ 'Changelog': 'https://meteoswiss.github.io/ampycloud/changelog.html', 'Issues': 'https://github.com/MeteoSwiss/ampycloud/issues' }, - author="Frédéric P.A. Vogt", + author="Frédéric P.A. Vogt, Daniel Regenass", author_email="frederic.vogt@meteoswiss.ch", - description="Characterization of cloud layers from ceilometer measurements", + description="Determining the sky coverage fraction and base height of cloud layers using" + "ceilometer data", long_description=long_description, long_description_content_type="text/markdown", python_requires='>=3.9.0', @@ -52,7 +53,7 @@ "ruamel.yaml" ], extras_require={ - 'dev': ['sphinx', 'sphinx-rtd-theme', 'pylint', 'pytest'] + 'dev': ['sphinx', 'sphinx-rtd-theme', 'pylint', 'pytest', 'setuptools', 'mypy'] }, # Setup entry points to use ampycloud directly from a terminal entry_points={ @@ -63,7 +64,7 @@ # 3 - Alpha # 4 - Beta # 5 - Production/Stable - 'Development Status :: 3 - Alpha', + 'Development Status :: 3 - Beta', # Indicate who your project is intended for 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering :: Atmospheric Science', @@ -71,7 +72,10 @@ 'License :: OSI Approved :: BSD License', # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', ], # Let's make sure the parameter non-py files get included in the wheels on pypi. include_package_data=True diff --git a/src/ampycloud/cluster.py b/src/ampycloud/cluster.py index b847fc8..221e2fa 100644 --- a/src/ampycloud/cluster.py +++ b/src/ampycloud/cluster.py @@ -10,7 +10,7 @@ # Import from Python import logging -from typing import Union +from typing import Optional, Union import numpy as np from sklearn.cluster import AgglomerativeClustering @@ -23,7 +23,7 @@ @log_func_call(logger) -def agglomerative_cluster(data: np.ndarray, n_clusters: int = None, +def agglomerative_cluster(data: np.ndarray, n_clusters: Union[int, None] = None, metric: str = 'euclidean', linkage: str = 'single', distance_threshold: Union[int, float] = 1) -> tuple: """ Function that wraps arround :py:class:`sklearn.cluster.AgglomerativeClustering`. @@ -54,7 +54,7 @@ def agglomerative_cluster(data: np.ndarray, n_clusters: int = None, @log_func_call(logger) -def clusterize(data: np.ndarray, algo: str = None, **kwargs: dict) -> tuple: +def clusterize(data: np.ndarray, algo: Union[str, None] = None, **kwargs: dict) -> Optional[tuple]: """ Umbrella clustering routine, that provides a single access point to the different clustering algorithms. diff --git a/src/ampycloud/core.py b/src/ampycloud/core.py index 1e52de8..2d3b430 100644 --- a/src/ampycloud/core.py +++ b/src/ampycloud/core.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the BSD-3-Clause license. @@ -56,11 +56,11 @@ def copy_prm_file(save_loc: str = './', which: str = 'defaults') -> None: """ # Let's take a look at the path I was given - save_loc = Path(save_loc) + given_path = Path(save_loc) # I won't create stuff for the users. ampycloud is not that nice. - if not save_loc.exists(): + if not given_path.exists(): raise AmpycloudError('save_loc does not appear to exist !') - if not save_loc.is_dir(): + if not given_path.is_dir(): raise AmpycloudError('save_loc does not appear to be a directory !') # Next, let's look at all the parameter files available ... @@ -73,11 +73,11 @@ def copy_prm_file(save_loc: str = './', which: str = 'defaults') -> None: if (fname := f'ampycloud_{which}_prms.yml') not in ref_files: raise AmpycloudError(f'Parameter file {fname} not found.') - if (save_loc / fname).exists(): - raise AmpycloudError(f'File {fname} already exists at save_loc={save_loc}') + if (given_path / fname).exists(): + raise AmpycloudError(f'File {fname} already exists at save_loc={given_path}') # All looks good, let's copy the file - copy(ref_loc / fname, save_loc / fname) + copy(ref_loc / fname, given_path / fname) @log_func_call(logger) @@ -131,9 +131,13 @@ def set_prms(pth: Union[str, Path]) -> None: @log_func_call(logger) -def reset_prms() -> None: +def reset_prms(which: Union[str, list, None] = None) -> None: """ Reset the ampycloud dynamic=scientific parameters to their default values. + Args: + which (str|list, optional): (list of) names of parameters to reset specifically. + If not set (by default), all parameters will be reset. + Example: :: @@ -148,12 +152,26 @@ def reset_prms() -> None: """ - dynamic.AMPYCLOUD_PRMS = dynamic.get_default_prms() + if which is None: + dynamic.AMPYCLOUD_PRMS = dynamic.get_default_prms() + return + + # Ok, reset the parameters one at a time + default_prms = dynamic.get_default_prms() + + # Clean up which + which = [which] if isinstance(which, str) else which + + for prm in which: + if prm not in default_prms.keys(): + raise AmpycloudError(f'Unknown parameter name: {prm}') + + dynamic.AMPYCLOUD_PRMS[prm] = default_prms[prm] @log_func_call(logger) -def run(data: pd.DataFrame, prms: dict = None, geoloc: str = None, - ref_dt: Union[str, datetime] = None) -> CeiloChunk: +def run(data: pd.DataFrame, prms: Union[dict, None] = None, geoloc: Union[str, None] = None, + ref_dt: Union[str, datetime, None] = None) -> CeiloChunk: """ Runs the ampycloud algorithm on a given dataset. Args: @@ -179,7 +197,7 @@ def run(data: pd.DataFrame, prms: dict = None, geoloc: str = None, .. important :: ampycloud treats Vertical Visibility hits no differently than any other hit. Hence, it is up - to the user to adjust the Vertical Visibility hit altitude (and/or ignore some of them, for + to the user to adjust the Vertical Visibility hit height (and/or ignore some of them, for example) prior to feeding them to ampycloud, so that it can be used as a cloud hit. .. important:: @@ -193,7 +211,7 @@ def run(data: pd.DataFrame, prms: dict = None, geoloc: str = None, All the scientific parameters of the algorithm are set dynamically in the :py:mod:`.dynamic` module. From within a Python session all these parameters can be changed directly. For example, - to change the Minimum Sector Altitude, one would do: + to change the Minimum Sector Altitude (to be specified in ft aal), one would do: :: from ampycloud import dynamic @@ -223,7 +241,7 @@ def run(data: pd.DataFrame, prms: dict = None, geoloc: str = None, The :py:class:`.data.CeiloChunk` instance returned by this function contains all the information associated to the ampycloud algorithm, inclduing the raw data and slicing/grouping/layering info. Its method :py:meth:`.data.CeiloChunk.metar_msg` provides direct access to the resulting - METAR-like message. Users that require the altitude, okta amount, and/or exact sky coverage + METAR-like message. Users that require the height, okta amount, and/or exact sky coverage fraction of layers can get them via the :py:attr:`.data.CeiloChunk.layers` class property. Example: @@ -239,7 +257,7 @@ def run(data: pd.DataFrame, prms: dict = None, geoloc: str = None, # Generate the canonical demo dataset for ampycloud mock_data = mocker.canonical_demo_data() - # Run the ampycloud algorithm on it, setting the MSA to 10'000 ft + # Run the ampycloud algorithm on it, setting the MSA to 10'000 ft aal. chunk = ampycloud.run(mock_data, prms={'MSA':10000}, geoloc='Mock data', ref_dt=datetime.now()) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 6633dfa..7c01f54 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2023 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -14,7 +14,6 @@ import copy from abc import ABC, abstractmethod import numpy as np -import numpy.typing as npt import pandas as pd # Import from this package @@ -53,12 +52,13 @@ def __init__(self, data: pd.DataFrame, prms: Optional[dict] = None, @property def msa(self) -> float: - """ The Minimum Sector Altitude set when initializing this specific instance. """ + """ The Minimum Sector Altitude set when initializing this specific instance, in ft aal. """ return self.prms['MSA'] @property def msa_hit_buffer(self) -> float: - """ The Minimum Sector Altitude hit buffer set when initializing this specific instance. """ + """ The Minimum Sector Altitude hit buffer set when initializing this specific instance, + in ft. """ return self.prms['MSA_HIT_BUFFER'] @property @@ -93,16 +93,29 @@ def _cleanup_pdf(self, data: pd.DataFrame) -> pd.DataFrame: # Begin with a thorough inspection of the dataset data = utils.check_data_consistency(data, req_cols=self.DATA_COLS) - # Then also drop any hits that is too high + # By default we set this flag to false and overwrite if enough hits are present + self._clouds_above_msa_buffer = False + + # Drop any hits that are too high and check if they exceed the threshold for 1 OKTA + # if yes, set the flag clouds_above_msa_buffer to True if self.msa is not None: - hit_alt_lim = self.msa + self.msa_hit_buffer - logger.info('Cropping hits above MSA+buffer: %s ft', str(hit_alt_lim)) - # Type 1 or less hits above the cut threshold get turned to NaNs, to signal a - # non-detection below the MSA. Also change the hit type to 0 accordingly ! - data.loc[data[(data.alt > hit_alt_lim) & (data.type <= 1)].index, 'type'] = 0 - data.loc[data[(data.alt > hit_alt_lim) & (data.type <= 1)].index, 'alt'] = np.nan + hit_height_lim = self.msa + self.msa_hit_buffer + logger.info('Cropping hits above MSA+buffer: %s ft', str(hit_height_lim)) + # First layer and vervis hits above the cut threshold get turned to NaNs, to signal a + # non-detection below the MSA. Also change the hit type to 0 accordingly in order + # to create a "no hit detected" in the range of interest (i.e. below MSA). + above_msa_t1_or_less = data[(data.height > hit_height_lim) & (data.type <= 1)].index + data.loc[above_msa_t1_or_less, 'type'] = 0 + data.loc[above_msa_t1_or_less, 'height'] = np.nan # Type 2 or more hits get cropped (there should be only 1 non-detection per time-stamp). - data = data.drop(data[(data.alt > hit_alt_lim) & (data.type > 1)].index) + above_msa_t2_or_more = data[(data.height > hit_height_lim) & (data.type > 1)].index + data = data.drop(above_msa_t2_or_more) + if len(above_msa_t1_or_less) + len(above_msa_t2_or_more) > self._prms['MAX_HITS_OKTA0']: + logger.info( + "Hits above MSA + MSA_HIT_BUFFER exceeded threshold MAX_HITS_OKTA0. Will add " + "flag 'high_clouds_detected' to indicate the presence of high clouds." + ) + self._clouds_above_msa_buffer = True return data @@ -150,7 +163,7 @@ def __init__(self, data: pd.DataFrame, prms: Optional[dict] = None, CeiloChunk.DATA_COLS, i.e. : :: - ['ceilo', 'dt', 'alt', 'type'] + ['ceilo', 'dt', 'height', 'type'] Specifically: @@ -163,8 +176,8 @@ def __init__(self, data: pd.DataFrame, prms: Optional[dict] = None, hit time in s, **as float**. This should typically be a negative number (because METARs are assembled using *existing=past* ceilometer observations). - - ``alt``: cloud base hit altitude in ft, **as float**. The cloud base altitude computed - by the ceilometer. + - ``height``: cloud base hit height in ft above aerodrome level (aal), **as float**. + The cloud base height computed by the ceilometer. - ``type``: cloud hit type, **as int**. A value n>0 indicates that the hit is the n-th (from the ground) that was reported by this specific ceilometer for this specific @@ -186,16 +199,17 @@ def __init__(self, data: pd.DataFrame, prms: Optional[dict] = None, self._layers = None @log_func_call(logger) - def data_rescaled(self, dt_mode: Optional[str] = None, alt_mode: Optional[str] = None, - dt_kwargs: Optional[dict] = None, alt_kwargs: Optional[dict] = None) -> pd.DataFrame: + def data_rescaled(self, dt_mode: Optional[str] = None, height_mode: Optional[str] = None, + dt_kwargs: Optional[dict] = None, + height_kwargs: Optional[dict] = None) -> pd.DataFrame: """ Returns a copy of the data, rescaled according to the provided parameters. Args: dt_mode (str, optional): scaling rule for the time deltas. Defaults to None. - alt_mode (str, optional): scaling rule for the altitudes. Defaults to None. + height_mode (str, optional): scaling rule for the heights. Defaults to None. dt_kwargs (dict, optional): dict of arguments to be fed to the chosen dt scaling routine. Defaults to None. - alt_kwargs (dict, optinal): dict of arguments to be fed to the chosen alt scaling + height_kwargs (dict, optinal): dict of arguments to be fed to the chosen height scaling routine. Defaults to None. Returns: @@ -210,8 +224,8 @@ def data_rescaled(self, dt_mode: Optional[str] = None, alt_mode: Optional[str] = # Deal with the default NoneType if dt_kwargs is None: dt_kwargs = {} - if alt_kwargs is None: - alt_kwargs = {} + if height_kwargs is None: + height_kwargs = {} # Make a deep copy of the data, to avoid messing it up out = copy.deepcopy(self.data) @@ -219,8 +233,8 @@ def data_rescaled(self, dt_mode: Optional[str] = None, alt_mode: Optional[str] = # Deal with the dt first out['dt'] = scaler.apply_scaling(out['dt'], dt_mode, **dt_kwargs) - # Then the altitudes - out['alt'] = scaler.apply_scaling(out['alt'], alt_mode, **alt_kwargs) + # Then the heights + out['height'] = scaler.apply_scaling(out['height'], height_mode, **height_kwargs) return out @@ -260,7 +274,7 @@ def max_hits_per_layer(self) -> int: def _calculate_base_height_for_selection( self, - data_indexer: pd.Series(dtype=bool), + data_indexer # type: pd.Series[bool] ) -> float: """Calculate the cloud base height for a selection of data. @@ -272,46 +286,44 @@ def _calculate_base_height_for_selection( float: The base height of the given data selection. """ - # Start computing the base altitude + # Start computing the base height # First, compute which points should be considered in terms of lookback time - return utils.calc_base_alt( - self.data.sort_values('dt').loc[data_indexer]['alt'].values, + return utils.calc_base_height( + self.data.sort_values('dt').loc[data_indexer]['height'].values, self.prms['BASE_LVL_LOOKBACK_PERC'], - self.prms['BASE_LVL_ALT_PERC'], + self.prms['BASE_LVL_HEIGHT_PERC'], ) - - def _get_min_sep_for_altitude(self, altitude: float) -> float: - """Get the minimum separation for a given altitude. + def _get_min_sep_for_height(self, height: float) -> float: + """Get the minimum separation for a given height. Args: - altitude (float): The altitude for which we want to know the minimum - separation + height (float): The height for which we want to know the minimum + separation. Returns: - float: The minimum separation for the given altitude. + float: The minimum separation for the given height. Raises: AmpycloudError: If the length of MIN_SEP_LIMS is not one less than MIN_SEP_VALS """ - if len(self.prms['MIN_SEP_LIMS']) != \ - len(self.prms['MIN_SEP_VALS']) - 1: - raise AmpycloudError( - '"MIN_SEP_LIMS" must have one less item than "MIN_SEP_VALS".' - 'Got MIN_SEP_LIMS %i and MIN_SEP_VALS %i', - (self.prms['MIN_SEP_LIMS'], self.prms['MIN_SEP_VALS']) - ) + if len(self.prms['MIN_SEP_LIMS']) != len(self.prms['MIN_SEP_VALS']) - 1: + raise AmpycloudError( + '"MIN_SEP_LIMS" must have one less item than "MIN_SEP_VALS".' + f'Got MIN_SEP_LIMS {self.prms["MIN_SEP_LIMS"]} ' + f'and MIN_SEP_VALS {self.prms["MIN_SEP_VALS"]}.', + ) min_sep_val_id = np.searchsorted(self.prms['MIN_SEP_LIMS'], - altitude) + height) min_sep = self.prms['MIN_SEP_VALS'][min_sep_val_id] - logger.info('Alt: %.1f', altitude) + logger.info('Height: %.1f', height) logger.info('min_sep value: %.1f', min_sep) return min_sep - def _get_cluster_ids(self, which: str) -> npt.ArrayLike: + def _get_cluster_ids(self, which: str) -> np.ndarray: """Get the original IDs of slices, groups or layers. Args: @@ -329,7 +341,7 @@ def _get_cluster_ids(self, which: str) -> npt.ArrayLike: # of hits that do not get assigned to a sli/gro/lay. return np.delete(cids, np.where(cids == -1)) - def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt.ArrayLike]: + def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, np.ndarray]: """Setup a data frame for slices, groups or layers and keep track of IDs. Args: @@ -348,11 +360,11 @@ def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt cols = ['n_hits', # Duplicate-corrected number of hits 'perc', # Duplicate-corrected hit percentage (in %) 'okta', # Corresponding okta value - 'alt_base', # Slice/Group/Layer base altitude - 'alt_mean', # Slice/Group/Layer mean altitude - 'alt_std', # Slice/Group/Layer altitude std - 'alt_min', # Slice/Group/Layer min altitude - 'alt_max', # Slice/Group/Layer max altitude + 'height_base', # Slice/Group/Layer base height + 'height_mean', # Slice/Group/Layer mean height + 'height_std', # Slice/Group/Layer height std + 'height_min', # Slice/Group/Layer min height + 'height_max', # Slice/Group/Layer max height 'thickness', # Slice/Group/Layer thickness 'fluffiness', # Slice/Group/Layer fluffiness 'code', # METAR code @@ -361,12 +373,11 @@ def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt ] # We want to raise early if 'which' is unknown. - if not which in ['slices', 'groups', 'layers']: + if which not in ['slices', 'groups', 'layers']: raise AmpycloudError( - 'Trying to initialize a data frame for %s ' + f'Trying to initialize a data frame for {which}, ' 'which is unknown. Keyword arg "which" must be one of' '"slices", "groups" or "layers"' - %which ) # If I am looking at the slices, also keep track of whether they are isolated, or not. @@ -411,7 +422,7 @@ def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt return pdf, cluster_ids def _calculate_cloud_amount( - self, which: str, pdf: pd.DataFrame, cluster_ids: npt.ArrayLike + self, which: str, pdf: pd.DataFrame, cluster_ids: np.ndarray ) -> pd.DataFrame: """Calculate cloud amount for a given slice, group or layer. @@ -427,7 +438,7 @@ def _calculate_cloud_amount( Results are written to the "okta" column of the DF. """ - for ind, cid in enumerate(cluster_ids): + for ind, cid in enumerate(cluster_ids): # Which hits are in this sli/gro/lay ? in_sligrolay = self.data[which[:-1]+'_id'] == cid # Compute the number of hits of this slice/group/layer for each ceilometer, @@ -462,7 +473,7 @@ def _add_sligrolay_information( self, which: str, pdf: pd.DataFrame, - cluster_ids: npt.ArrayLike + cluster_ids: np.ndarray ) -> pd.DataFrame: """Add statistical properties to slices/ groups/ layers . @@ -473,36 +484,35 @@ def _add_sligrolay_information( groups/ layers. Returns: - pd.DataFrame: with additional results in the columns alt_min, - alt_mean, alt_max, alt_std, thickness, fluffiness. + pd.DataFrame: with additional results in the columns height_min, + height_mean, height_max, height_std, thickness, fluffiness. """ for ind, cid in enumerate(cluster_ids): # Which hits are in this sli/gro/lay ? in_sligrolay = self.data[which[:-1]+'_id'] == cid - # Measure the mean altitude and associated std of the layer - pdf.iloc[ind, pdf.columns.get_loc('alt_mean')] = \ - self.data.loc[in_sligrolay, 'alt'].mean(skipna=True) - pdf.iloc[ind, pdf.columns.get_loc('alt_std')] = \ - self.data.loc[in_sligrolay, 'alt'].std(skipna=True) + # Measure the mean height and associated std of the layer + pdf.iloc[ind, pdf.columns.get_loc('height_mean')] = \ + self.data.loc[in_sligrolay, 'height'].mean(skipna=True) + pdf.iloc[ind, pdf.columns.get_loc('height_std')] = \ + self.data.loc[in_sligrolay, 'height'].std(skipna=True) # Let's also keep track of the min, max, thickness, and fluffiness values - pdf.iloc[ind, pdf.columns.get_loc('alt_min')] = \ - self.data.loc[in_sligrolay, 'alt'].min(skipna=True) - pdf.iloc[ind, pdf.columns.get_loc('alt_max')] = \ - self.data.loc[in_sligrolay, 'alt'].max(skipna=True) + pdf.iloc[ind, pdf.columns.get_loc('height_min')] = \ + self.data.loc[in_sligrolay, 'height'].min(skipna=True) + pdf.iloc[ind, pdf.columns.get_loc('height_max')] = \ + self.data.loc[in_sligrolay, 'height'].max(skipna=True) pdf.iloc[ind, pdf.columns.get_loc('thickness')] = \ - pdf.iloc[ind, pdf.columns.get_loc('alt_max')] - \ - pdf.iloc[ind, pdf.columns.get_loc('alt_min')] + pdf.iloc[ind, pdf.columns.get_loc('height_max')] - \ + pdf.iloc[ind, pdf.columns.get_loc('height_min')] pdf.iloc[ind, pdf.columns.get_loc('fluffiness')], _ = \ fluffer.get_fluffiness( - self.data.loc[in_sligrolay, ['dt', 'alt']].values, - boost=self.prms['GROUPING_PRMS']['fluffiness_boost'], + self.data.loc[in_sligrolay, ['dt', 'height']].values, **self.prms['LOWESS']) return pdf def _calculate_sligrolay_base_height( - self, which: str, pdf: pd.DataFrame, cluster_ids: npt.ArrayLike + self, which: str, pdf: pd.DataFrame, cluster_ids: np.ndarray ) -> pd.DataFrame: """Calculate base height for all slices/ groups/ layers. @@ -512,21 +522,19 @@ def _calculate_sligrolay_base_height( cluster_ids (npt.ArrayLike): Original IDs of slices/ groups/ layers. Returns: - pd.DataFrame: with calculatio results in columnm alt_base + pd.DataFrame: with calculatio results in column height_base """ for ind, cid in enumerate(cluster_ids): # Which hits are in this sli/gro/lay ? in_sligrolay = self.data[which[:-1]+'_id'] == cid - # Compute the base altitude - pdf.iloc[ind, pdf.columns.get_loc('alt_base')] = self._calculate_base_height_for_selection( - in_sligrolay, - ) + # Compute the base height + pdf.iloc[ind, pdf.columns.get_loc('height_base')] = \ + self._calculate_base_height_for_selection(in_sligrolay) return pdf @log_func_call(logger) - def metarize( - self, which: str = 'slices') -> None: + def metarize(self, which: str = 'slices') -> None: """ Assembles a :py:class:`pandas.DataFrame` of slice/group/layer METAR properties of interest. @@ -543,13 +551,13 @@ def metarize( * ``n_hits (int)``: duplicate-corrected number of hits * ``perc (float)``: sky coverage percentage (between 0-100) * ``okta (int)``: okta count - * ``alt_base (float)``: base altitude - * ``alt_mean (float)``: mean altitude - * ``alt_std (float)``: altitude standard deviation - * ``alt_min (float)``: minimum altitude - * ``alt_max (float)``: maximum altitude + * ``height_base (float)``: base height + * ``height_mean (float)``: mean height + * ``height_std (float)``: height standard deviation + * ``height_min (float)``: minimum height + * ``height_max (float)``: maximum height * ``thickness (float)``: thickness - * ``fluffiness (float)``: fluffiness (expressed in altitude units, i.e. ft) + * ``fluffiness (float)``: fluffiness (expressed in height units, i.e. ft) * ``code (str)``: METAR-like code * ``significant (bool)``: whether the layer is significant according to the ICAO rules. See :py:func:`.icao.significant_cloud` for details. @@ -563,7 +571,7 @@ def metarize( same ceilometer* are counted as one only. In other words, if a Type ``1`` and ``2`` hits **from the same ceilometer, at the same observation time** are included in a given slice/group/layer, they are counted as one hit only. This is a direct consequence of the - fact that clouds have a single base altitude at any given time [*citation needed*]. + fact that clouds have a single base height at any given time [*citation needed*]. Note: The metarize function is modularized in private submethods defined above. @@ -576,7 +584,7 @@ def metarize( # calculate cloud amount in okta pdf = self._calculate_cloud_amount(which, pdf, cids) - # calculate slice/ group/ layer base altitude + # calculate slice/ group/ layer base height pdf = self._calculate_sligrolay_base_height(which, pdf, cids) # collect some more information, including fluffiness @@ -587,12 +595,13 @@ def metarize( pdf.iloc[ind, pdf.columns.get_loc('code')] = \ wmo.okta2code(pdf.iloc[ind, pdf.columns.get_loc('okta')]) + \ - wmo.alt2code(pdf.iloc[ind, pdf.columns.get_loc('alt_base')]) + wmo.height2code(pdf.iloc[ind, pdf.columns.get_loc('height_base')]) # Set the proper column types for cname in ['n_hits', 'okta', 'cluster_id']: pdf[cname] = pdf[cname].astype(int) - for cname in ['perc', 'alt_base', 'alt_mean', 'alt_std', 'alt_min', 'alt_max', 'thickness', + for cname in ['perc', 'height_base', 'height_mean', 'height_std', 'height_min', + 'height_max', 'thickness', 'fluffiness']: pdf[cname] = pdf[cname].astype(float) for cname in ['code']: @@ -605,9 +614,9 @@ def metarize( if which == 'groups': pdf['ncomp'] = pdf['ncomp'].astype(int) - # Sort the table as a function of the base altitude of the sli/gro/lay. + # Sort the table as a function of the base height of the sli/gro/lay. # This is why having the 'cluster_id' info is useful (so I remember which they are). - pdf.sort_values('alt_base', inplace=True) + pdf.sort_values('height_base', inplace=True) # Reset the index, 'cause I only need the one. pdf.reset_index(drop=True, inplace=True) @@ -620,7 +629,7 @@ def metarize( @log_func_call(logger) def find_slices(self) -> None: - """ Identify general altitude slices in the chunk data. Intended as the first stage towards + """ Identify general height slices in the chunk data. Intended as the first stage towards the identification of cloud layers. Important: @@ -631,12 +640,12 @@ def find_slices(self) -> None: # Get a scaled **copy** of the data to feed the clustering algorithm tmp = self.data_rescaled(dt_mode='shift-and-scale', dt_kwargs={'scale': self.prms['SLICING_PRMS']['dt_scale']}, - alt_mode=self.prms['SLICING_PRMS']['alt_scale_mode'], - alt_kwargs=self.prms['SLICING_PRMS']['alt_scale_kwargs'], + height_mode=self.prms['SLICING_PRMS']['height_scale_mode'], + height_kwargs=self.prms['SLICING_PRMS']['height_scale_kwargs'], ) # What are the valid points ? - valids = tmp['alt'].notna() + valids = tmp['height'].notna() # Add a column to the original data to keep track of the slice id. # First, set them all to -1 and force the correct dtype. I hate pandas for this ... @@ -649,12 +658,12 @@ def find_slices(self) -> None: elif len(valids[valids]) > 1: # ... run the clustering on them ... _, labels = cluster.clusterize( - tmp[['dt', 'alt']][valids].to_numpy(), algo='agglomerative', + tmp[['dt', 'height']][valids].to_numpy(), algo='agglomerative', **{'linkage': 'average', 'metric': 'manhattan', 'distance_threshold': self.prms['SLICING_PRMS']['distance_threshold']}) # ... and set the labels in the original data - self.data.loc[self.data['alt'].notna(), ['slice_id']] = labels + self.data.loc[self.data['height'].notna(), ['slice_id']] = labels else: # If I get no valid points, do nothing at all pass @@ -666,18 +675,18 @@ def find_slices(self) -> None: def _merge_close_groups(self) -> None: """Merge groups that are closer than the minimum separation at their - respective altitudes. + respective heights. Algorithm steps: - 1. Calculate preliminary group base altitude + 1. Calculate preliminary group base height 2. Start from bottom and search for the first two groups that are too close. - 3. Merge these groups and recalculate all base altitudes. + 3. Merge these groups and recalculate all base heights. 4. Repeat 2. and 3. until all groups are enough separated. The reason this is done iteratively is that there can be multiple overlaps. In these cases merging two groups often separates their new (combined) - base altitude far enough from the base altitude of the next layer above. + base height far enough from the base height of the next layer above. Doing the merging all at once would thus lead to unwanted overgrouping. Attention: This method only recalculates height, not amount! If you want @@ -685,24 +694,24 @@ def _merge_close_groups(self) -> None: _calculate_cloud_amount or the public metarize() method. """ - # Let's setup a groups df and get the groups base altitude. + # Let's setup a groups df and get the groups base height. # setup pd.DataFrame to store slices/ groups/ layers prelim_groups, cids = self._setup_sligrolay_pdf('groups') - # calculate bae altitude + # calculate bae height prelim_groups = self._calculate_sligrolay_base_height( 'groups', prelim_groups, cids ) - # Ordering by altitude - prelim_groups.sort_values('alt_base', inplace=True) + # Ordering by height + prelim_groups.sort_values('height_base', inplace=True) prelim_groups.reset_index(drop=True, inplace=True) #self.metarize('groups') #prelim_groups = self.groups - min_seps_grp = prelim_groups['alt_base'].apply(self._get_min_sep_for_altitude) - base_alt_diffs = prelim_groups['alt_base'].diff() + min_seps_grp = prelim_groups['height_base'].apply(self._get_min_sep_for_height) + base_height_diffs = prelim_groups['height_base'].diff() # create a boolean series to select values, fillna is necessary as # first entry of diff will be nan - lt_min_sep_indexer = (base_alt_diffs < min_seps_grp).fillna(False) + lt_min_sep_indexer = (base_height_diffs < min_seps_grp).fillna(False) while len(prelim_groups[lt_min_sep_indexer]) > 0: # start with the two lowest layers that are too close idx = prelim_groups[lt_min_sep_indexer].index[0] @@ -716,16 +725,16 @@ def _merge_close_groups(self) -> None: prelim_groups.drop(index=idx, inplace=True) # resetting because we must not have index gaps in the next iteration prelim_groups.reset_index(drop=True, inplace=True) - # now we recalculate the base alt for the merged supergroup + # now we recalculate the base height for the merged supergroup data_idxer = self.data['group_id'] == prelim_groups['cluster_id'].iloc[idx - 1] prelim_groups.iloc[ - idx - 1, prelim_groups.columns.get_loc('alt_base') + idx - 1, prelim_groups.columns.get_loc('height_base') ] = self._calculate_base_height_for_selection(data_idxer) - # as this changes base alt, it is possible that we now are closer + # as this changes base height, it is possible that we now are closer # to another group, so we have to continue iteratively. - min_seps_grp = prelim_groups['alt_base'].apply(self._get_min_sep_for_altitude) - base_alt_diffs = prelim_groups['alt_base'].diff() - lt_min_sep_indexer = (base_alt_diffs < min_seps_grp).fillna(False) + min_seps_grp = prelim_groups['height_base'].apply(self._get_min_sep_for_height) + base_height_diffs = prelim_groups['height_base'].diff() + lt_min_sep_indexer = (base_height_diffs < min_seps_grp).fillna(False) @log_func_call(logger) def find_groups(self) -> None: @@ -761,15 +770,15 @@ def find_groups(self) -> None: # Let's get ready to measure the slice separation above and below with respect to the # other ones. - alt_pad = self.prms['GROUPING_PRMS']['alt_pad_perc']/100 - m_lim = row['alt_min'] - alt_pad * row['thickness'] - p_lim = row['alt_max'] + alt_pad * row['thickness'] + height_pad = self.prms['GROUPING_PRMS']['height_pad_perc']/100 + m_lim = row['height_min'] - height_pad * row['thickness'] + p_lim = row['height_max'] + height_pad * row['thickness'] # For each other slice below and above, figure out if it is overlapping or not - seps_m = [m_lim < self.slices.iloc[item]['alt_max'] + - alt_pad * self.slices.iloc[item]['thickness'] for item in range(ind)] - seps_p = [p_lim > self.slices.iloc[item]['alt_min'] - - alt_pad * self.slices.iloc[item]['thickness'] + seps_m = [m_lim < self.slices.iloc[item]['height_max'] + + height_pad * self.slices.iloc[item]['thickness'] for item in range(ind)] + seps_p = [p_lim > self.slices.iloc[item]['height_min'] - + height_pad * self.slices.iloc[item]['thickness'] for item in range(ind+1, len(self.slices), 1)] # If the slice is isolated, I can stop here and move on ... @@ -806,29 +815,29 @@ def find_groups(self) -> None: valids = self.data['slice_id'].isin([self.slices.iloc[ind]['cluster_id'] for ind in grp]) - # Rescale these points in dt and alt prior to running the single-linkage clustering. + # Rescale these points in dt and height prior to running the single-linkage clustering. # dt scaling is provided by the user. - # alt scaling is derived from the smallest fluffiness of the slice bundle. - grp_alt_scale = self.slices.loc[grp, 'fluffiness'].min(skipna=True) - logger.debug('Bundle min. fluffiness: %.1f ft', grp_alt_scale) + # height scaling is derived from the smallest fluffiness of the slice bundle. + grp_height_scale = self.slices.loc[grp, 'fluffiness'].min(skipna=True) + logger.debug('Bundle min. fluffiness: %.1f ft', grp_height_scale) # ... and check against the allowed scaling range - grp_alt_scale = np.max([np.min(self.prms['GROUPING_PRMS']['alt_scale_range']), - grp_alt_scale]) - grp_alt_scale = np.min([np.max(self.prms['GROUPING_PRMS']['alt_scale_range']), - grp_alt_scale]) - logger.debug('Bundle alt. scale: %.1f ft', grp_alt_scale) + grp_height_scale = np.max([np.min(self.prms['GROUPING_PRMS']['height_scale_range']), + grp_height_scale]) + grp_height_scale = np.min([np.max(self.prms['GROUPING_PRMS']['height_scale_range']), + grp_height_scale]) + logger.debug('Bundle height scale: %.1f ft', grp_height_scale) # Ready to trigger the data rescaling tmp = self.data_rescaled(dt_mode='shift-and-scale', dt_kwargs={'scale': self.prms['GROUPING_PRMS']['dt_scale']}, - alt_mode='shift-and-scale', - alt_kwargs={'shift': 0, 'scale': grp_alt_scale}) + height_mode='shift-and-scale', + height_kwargs={'shift': 0, 'scale': grp_height_scale}) # What are the valid points ? - valids = tmp['alt'].notna() * valids + valids = tmp['height'].notna() * valids # Run the clustering nlabels, labels = cluster.clusterize( - tmp[['dt', 'alt']][valids].to_numpy(), algo='agglomerative', + tmp[['dt', 'height']][valids].to_numpy(), algo='agglomerative', **{'linkage': 'single', 'metric': 'euclidean', 'distance_threshold': 1}) # Based on the clustering, assign each element to a group. The group id is the slice_id @@ -844,7 +853,7 @@ def find_groups(self) -> None: # Now we need to separate layers if they are closer than the set # minimum separation self._merge_close_groups() - #Let's metarize the merged groups + # Let's metarize the merged groups self.metarize(which='groups') @log_func_call(logger) @@ -871,18 +880,17 @@ def find_layers(self) -> None: # Loop through every group, and look for sub-layers in it ... for ind in range(len(self.groups)): - # Let's extract the altitudes of all the hits in this group ... - gro_alts = self.data.loc[self.data.loc[:, 'group_id'] == - self._groups.at[ind, 'cluster_id'], - 'alt'].to_numpy() + # Let's extract the heights of all the hits in this group ... + gro_heights = self.data.loc[self.data.loc[:, 'group_id'] == + self._groups.at[ind, 'cluster_id'], 'height'].to_numpy() # Only look for multiple layers if it is worth it ... # 1) Layer density is large enough cond1 = self.groups.at[ind, 'okta'] < self.prms['LAYERING_PRMS']['min_okta_to_split'] # 2) I have more than 30 valid points (GMM is unstable below this amount). - cond2 = len(gro_alts[~np.isnan(gro_alts)]) < 30 - # 3) Not all the altitudes are the same - cond3 = len(np.unique(gro_alts[~np.isnan(gro_alts)])) == 1 + cond2 = len(gro_heights[~np.isnan(gro_heights)]) < 30 + # 3) Not all the heights are the same + cond3 = len(np.unique(gro_heights[~np.isnan(gro_heights)])) == 1 if cond1 or cond2 or cond3: # Add some useful info to the log logger.info( @@ -893,22 +901,22 @@ def find_layers(self) -> None: continue # Reshape the array in anticipation of the GMM routine ... - gro_alts = gro_alts.reshape(-1, 1) + gro_heights = gro_heights.reshape(-1, 1) - # Identify the minimum layer separation given the overall group base altitude - min_sep = self._get_min_sep_for_altitude(self.groups.at[ind, 'alt_base']) + # Identify the minimum layer separation given the overall group base height + min_sep = self._get_min_sep_for_height(self.groups.at[ind, 'height_base']) - # Handle #78: if the data is comprised of only two distinct altitudes, only look for + # Handle #78: if the data is comprised of only two distinct heights, only look for # up to 2 Gaussian components. Else, up to 3. - ncomp_max = np.min([len(np.unique(gro_alts[~np.isnan(gro_alts)])), 3]) + ncomp_max = np.min([len(np.unique(gro_heights[~np.isnan(gro_heights)])), 3]) logger.debug('Setting ncomp_max to: %i', ncomp_max) # And feed them to a Gaussian Mixture Model to figure out how many components it has ... ncomp, sub_layers_id, _ = layer.ncomp_from_gmm( - gro_alts, ncomp_max=ncomp_max, min_sep=min_sep, + gro_heights, ncomp_max=ncomp_max, min_sep=min_sep, layer_base_params={ 'lookback_perc': self.prms['BASE_LVL_LOOKBACK_PERC'], - 'alt_perc': self.prms['BASE_LVL_ALT_PERC'] + 'height_perc': self.prms['BASE_LVL_HEIGHT_PERC'] }, **self.prms['LAYERING_PRMS']['gmm_kwargs']) @@ -992,6 +1000,29 @@ def layers(self) -> pd.DataFrame: identified by the layering algorithm. """ return self._layers + @property + def clouds_above_msa_buffer(self) -> bool: + """ Returns whether a number of hits exceeding the threshold for 1 okta is detected above + MSA + MSA_HIT_BUFFER. + + Returns: + bool: whether high clouds were detected. + + """ + return self._clouds_above_msa_buffer + + def _ncd_or_nsc(self) -> str: + """ Return the METAR code for No Cloud Detected / No Significant Cloud. + Decision based on the attribute self._clouds_above_msa_buffer. + + Returns: + str: 'NCD' or 'NSC' + + """ + if self._clouds_above_msa_buffer: + return 'NSC' + return 'NCD' + def metar_msg(self, which: str = 'layers') -> str: """ Construct a METAR-like message for the identified cloud slices, groups, or layers. @@ -1007,15 +1038,16 @@ def metar_msg(self, which: str = 'layers') -> str: the resulting ``str`` ! See :py:func:`.icao.significant_cloud` for details. .. Caution:: - The Minimum Sector Altitude value set when the :py:class:`.CeiloChunk` instance **was - initialized** will be applied ! If in doubt, the value used by this method is that set - in the (parent) class attribute :py:attr:`.AbstractChunk.msa`. + The Minimum Sector Altitude values set when + the :py:class:`.CeiloChunk` instance **was initialized** will be applied ! If in doubt, + the values used by this method are those set in the (parent) class + attribute :py:attr:`.AbstractChunk.msa` """ # Deal with the MSA: set it to infinity if None was specified if self.msa is None: - msa_val = np.infty + msa_val = np.inf else: msa_val = self.msa @@ -1025,17 +1057,21 @@ def metar_msg(self, which: str = 'layers') -> str: # Deal with the 0 layer situation if getattr(self, f'n_{which}') == 0: - return 'NCD' + return self._ncd_or_nsc() # Deal with the situation where layers have been found ... msg = sligrolay['code'] # What layers are significant *AND* below the MSA ? - report = sligrolay['significant']*(sligrolay['alt_base'] < msa_val) + report = sligrolay['significant']*(sligrolay['height_base'] < msa_val) msg = sligrolay['code'][report] msg = ' '.join(msg.to_list()) # Here, deal with the situations when all clouds are above the MSA if len(msg) == 0: - return 'NCD' + # first check if any significant clouds are in the interval [MSA, MSA+MSA_HIT_BUFFER] + sligrolay_in_buffer = sligrolay['significant'] * (sligrolay['height_base'] >= msa_val) + if sligrolay_in_buffer.any(): + return 'NSC' # and return a NSC as it implies that the cloud is above the MSA + return self._ncd_or_nsc() # else, check for CBH above MSA + MSA_HIT_BUFFER return msg diff --git a/src/ampycloud/fluffer.py b/src/ampycloud/fluffer.py index 0b5ea41..37e5e32 100644 --- a/src/ampycloud/fluffer.py +++ b/src/ampycloud/fluffer.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -23,20 +23,24 @@ @log_func_call(logger) -def get_fluffiness(pts, boost=2, **kwargs): +def get_fluffiness(pts, **kwargs): """ Utility functions to compute the fluffiness of a set of ceilometer hits. Args: - pts (ndarray): 2D array of [dt, alt] ceilometer hits. None must have NaNs altitudes. - boost (float): the fluffiness boost factor. Defaults to 2. + pts (ndarray): 2D array of [dt, height] ceilometer hits. None must have NaNs heights. **kwargs (optional): additional arguments to be fed to statsmodels.nonparameteric.lowess(). Returns: - float, ndarray: the fluffiness (in alt units) and LOWESS-smoothed (dt, alt) values (sorted). + float, ndarray: the fluffiness (in height units) and LOWESS-smoothed (dt, height) values + (sorted). - The *fluffiness* is computed as `boost * mean(abs(y - lowess))`, where lowess is the smooth + The *fluffiness* is computed as `2 * mean(abs(y - lowess))`, where lowess is the smooth LOWESS fit to the ceilometer hits. + The factor 2 stems from the fact that abs(y-lowess) corresponds to half(-ish) the slice + thickness, that needs to be doubled in order to use the fluffiness to rescale the slice onto the + 0 - 1 range. + To avoid LOWESS warning, hits with identical x coordinates (= time steps) are being offset by a small factor (1e-5). """ @@ -68,5 +72,5 @@ def get_fluffiness(pts, boost=2, **kwargs): lowess_pts = sm.nonparametric.lowess(pts[:, 1], unique_xs, return_sorted=True, is_sorted=True, missing='none', **kwargs) - fluffiness = boost * np.mean(np.abs(pts[:, 1] - lowess_pts[:, 1])) + fluffiness = 2 * np.mean(np.abs(pts[:, 1] - lowess_pts[:, 1])) return fluffiness, lowess_pts diff --git a/src/ampycloud/hardcoded.py b/src/ampycloud/hardcoded.py index 4c4f5f1..2d652ad 100644 --- a/src/ampycloud/hardcoded.py +++ b/src/ampycloud/hardcoded.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2022-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -11,4 +11,4 @@ from pandas import StringDtype #: dict: the columns & associated types required for the pandas DataFrame fed to ampycloud. -REQ_DATA_COLS = {'ceilo': StringDtype(), 'dt': float, 'alt': float, 'type': int} +REQ_DATA_COLS = {'ceilo': StringDtype(), 'dt': float, 'height': float, 'type': int} diff --git a/src/ampycloud/icao.py b/src/ampycloud/icao.py index c13b9db..37498e8 100644 --- a/src/ampycloud/icao.py +++ b/src/ampycloud/icao.py @@ -44,7 +44,7 @@ def significant_cloud(oktas: list) -> list: """ sig_level = 0 - sig = [] + sig: list[bool] = [] for okta in oktas: # There can be no more than 3 significant cloud layers. # See that footnote 14 in the ICAO doc ! diff --git a/src/ampycloud/layer.py b/src/ampycloud/layer.py index 7791629..8f4e99a 100644 --- a/src/ampycloud/layer.py +++ b/src/ampycloud/layer.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-CLause BSD License. @@ -142,9 +142,9 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta', def ncomp_from_gmm(vals: np.ndarray, ncomp_max: int = 3, min_sep: Union[int, float] = 0, - layer_base_params: dict[str, int] = {'lookback_perc': 100, 'alt_perc': 5}, + layer_base_params: Union[dict[str, int], None] = None, scores: str = 'BIC', - rescale_0_to_x: float = None, + rescale_0_to_x: Union[float, None] = None, random_seed: int = 42, **kwargs: dict) -> tuple: """ Runs a Gaussian Mixture Model on 1-D data, to determine if it contains 1, 2, or 3 @@ -160,6 +160,7 @@ def ncomp_from_gmm(vals: np.ndarray, first decide how many components looks "best", at which point these may get merged depending on min_sep. I.e. min_sep does not lead to re-running the GMM, it only merges the identified layers if required. + layer_base_params: Defined ampycloud parameters. scores (str, optional): either 'BIC' or 'AIC', to use Baysian Information Criterion or Akaike Information criterion scores. rescale_0_to_x (float, optional): if set, vals will be rescaled between 0 and this value @@ -180,6 +181,9 @@ def ncomp_from_gmm(vals: np.ndarray, ``_ """ + if layer_base_params is None: + layer_base_params = {'lookback_perc': 100, 'height_perc': 5} + # If I get a 1-D array, deal with it. if np.ndim(vals) == 1: vals = vals.reshape(-1, 1) @@ -218,9 +222,9 @@ def ncomp_from_gmm(vals: np.ndarray, models = {} # Run the Gaussian Mixture fit for all cases ... should we do anything more fancy here ? - with utils.tmp_seed(random_seed): - for n_val in ncomp: - models[n_val] = GaussianMixture(n_val, covariance_type='spherical').fit(vals) + for n_val in ncomp: + models[n_val] = GaussianMixture(n_val, covariance_type='spherical', + random_state=random_seed).fit(vals) # Extract the AICS and BICS scores if scores == 'AIC': @@ -230,26 +234,36 @@ def ncomp_from_gmm(vals: np.ndarray, else: raise AmpycloudError(f'Unknown scores: {scores}') + # Fix #119: at times, not all sub-layers may be populated by GaussianMixture. + # To avoid problems down the line, we shall boost the abics score of such cases to make sure + # they are NOT taken as the best model + for (n_id, n_val) in enumerate(ncomp): + if (n_eff := len(np.unique(models[n_val].predict(vals)))) < n_val: + logger.warning(' %i out of %i sub-layers populated by GaussianMixture(%i) - see #119. ' + 'Ruling out %i components as a possibility.', + n_eff, n_val, n_val, n_val) + abics[n_id] = max(abics) + 1 # The larger the abics score, the worst the fit. + # Get the interesting information out best_model_ind = best_gmm(abics, **kwargs) best_ncomp = ncomp[best_model_ind] best_ids = models[ncomp[best_model_ind]].predict(vals) - logger.debug('%s scores: %s', scores, abics) - logger.debug('best_model_ind (raw): %i', best_model_ind) - logger.debug('best_ncomp (raw): %i', best_ncomp) + logger.debug(' %s scores: %s', scores, abics) + logger.debug(' best_model_ind (raw): %i', best_model_ind) + logger.debug(' best_ncomp (raw): %i', best_ncomp) # If I found only one component, I can stop here if best_ncomp == 1: return best_ncomp, best_ids, abics # If I found more than one component, let's make sure that they are sufficiently far apart. - # First, let's compute the component base altitude + # First, let's compute the component base height base_comp_heights = [ - utils.calc_base_alt( + utils.calc_base_height( vals_orig[best_ids == i].flatten(), layer_base_params['lookback_perc'], - layer_base_params['alt_perc'] + layer_base_params['height_perc'] ) for i in range(ncomp[best_model_ind]) ] diff --git a/src/ampycloud/plots/core.py b/src/ampycloud/plots/core.py index 5ba0f3c..8d6a407 100644 --- a/src/ampycloud/plots/core.py +++ b/src/ampycloud/plots/core.py @@ -24,10 +24,13 @@ @set_mplstyle @log_func_call(logger) -def diagnostic(chunk: CeiloChunk, upto: str = 'layers', show_ceilos: bool = False, - ref_metar: str = None, ref_metar_origin: str = None, +def diagnostic(chunk: CeiloChunk, upto: str = 'layers', + show_ceilos: bool = False, + ref_metar: Union[str, None] = None, + ref_metar_origin: Union[str, None] = None, show: bool = True, - save_stem: str = None, save_fmts: Union[list, str] = None) -> None: + save_stem: Union[str, None] = None, + save_fmts: Union[list, str, None] = None) -> None: """ A function to create the ampycloud diagnostic plot all the way to the layering step (included). This is the ultimate ampycloud plot that shows it all (or not - you choose !). diff --git a/src/ampycloud/plots/diagnostics.py b/src/ampycloud/plots/diagnostics.py index 6e65492..a6c99eb 100644 --- a/src/ampycloud/plots/diagnostics.py +++ b/src/ampycloud/plots/diagnostics.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2023 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -12,11 +12,11 @@ import logging from functools import partial from copy import deepcopy +from typing import Union import numpy as np import matplotlib.pyplot as plt -from matplotlib import gridspec +from matplotlib import gridspec, rcParams from matplotlib.lines import Line2D -from matplotlib import rcParams # Import from this package from .. import dynamic, scaler, fluffer @@ -109,8 +109,8 @@ def show_hits_only(self, show_ceilos: bool = False) -> None: # Get a list of ceilo colors from the cycler. ceilo_clrs = plt.rcParams['axes.prop_cycle'].by_key()['color'] # Assign them to each hit - symb_clrs = [ceilo_clrs[self._chunk.ceilos.index(item) % len(ceilo_clrs)] - for item in self._chunk.data['ceilo']] + symb_clrs = np.array([ceilo_clrs[self._chunk.ceilos.index(item) % len(ceilo_clrs)] + for item in self._chunk.data['ceilo']]) # What are the VV hits ? is_vv = np.array(self._chunk.data['type'] == -1) @@ -123,7 +123,7 @@ def show_hits_only(self, show_ceilos: bool = False) -> None: self._axs[0].clear() # Add the points - self._axs[0].scatter(self._chunk.data['dt'], self._chunk.data['alt'], + self._axs[0].scatter(self._chunk.data['dt'], self._chunk.data['height'], marker='o', s=10, edgecolor=symb_clrs, facecolor=list(fcs)) @@ -161,7 +161,7 @@ def show_slices(self) -> None: # First plot anything that is not assigned to a slice self._axs[0].scatter(self._chunk.data[self._chunk.data['slice_id'] == -1]['dt'], - self._chunk.data[self._chunk.data['slice_id'] == -1]['alt'], + self._chunk.data[self._chunk.data['slice_id'] == -1]['height'], marker='s', s=15, facecolor=fcs[self._chunk.data['slice_id'] == -1], edgecolor='k') @@ -174,71 +174,72 @@ def show_slices(self) -> None: all_clrs = plt.rcParams['axes.prop_cycle'].by_key()['color'] # Then loop through each slice - for ind in range(self._chunk.n_slices): - - # Which hits are in the slice ? - in_slice = np.array(self._chunk.data['slice_id'] == - self._chunk.slices.at[ind, 'cluster_id']) - - # Create an array of facecolors ... choose them from my set of colors - base_clr = all_clrs[ind % len(all_clrs)] - fcs = np.array([base_clr] * len(self._chunk.data)) - fcs[is_vv] = 'none' - - # I can finally show the points ... - self._axs[0].scatter(self._chunk.data.loc[in_slice, 'dt'], - self._chunk.data.loc[in_slice, 'alt'], - marker='o', s=10, c=fcs[in_slice], edgecolor=base_clr) - - # ... and the corresponding LOWESS-fit used to derive their fluffiness - _, lowess_pts = fluffer.get_fluffiness( - self._chunk.data.loc[in_slice, ['dt', 'alt']].values, **self._chunk.prms['LOWESS']) - self._axs[0].plot(lowess_pts[:, 0], lowess_pts[:, 1], - ls='-', lw=1.5, c=base_clr, drawstyle='steps-mid', zorder=0) - - # Let's also plot the overlap area of the slice - slice_min = self._chunk.slices.loc[ind, 'alt_min'] - slice_max = self._chunk.slices.loc[ind, 'alt_max'] - thickness = self._chunk.slices.loc[ind, 'thickness'] - alt_pad = self._chunk.prms['GROUPING_PRMS']['alt_pad_perc']/100 - - # Get some fake data spanning the entire data range - misc = np.linspace(self._chunk.data['dt'].min(skipna=True), - self._chunk.data['dt'].max(skipna=True), 3) - self._axs[0].fill_between(misc, - np.ones_like(misc) * (slice_min - alt_pad * thickness), - np.ones_like(misc) * (slice_max + alt_pad * thickness), - edgecolor='none', alpha=0.1, zorder=0, - facecolor=base_clr) - - # Stop here if that slice has 0 okta. - if self._chunk.slices.iloc[ind]['okta'] == 0: - continue - - # Prepare to display the METAR codes for the slices. - # First, check if it is isolated or not, and significant or not. - if self._chunk.slices.iloc[ind]['significant']: - alpha = 1 - else: - alpha = 0 - if self._chunk.slices.iloc[ind]['isolated'] is False: - warn = r' $\Bumpeq$' - else: - warn = '' - - # Show the slice METAR text, plus the fluffiness, plus the isolation status - msg = r'\smaller ' - msg += wmo.okta2symb( - self._chunk.slices.iloc[ind]['okta'], - use_metsymb = dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') - msg += ' ' + self._chunk.slices.iloc[ind]['code'] + \ - rf' $f$:{self._chunk.slices.loc[ind, "fluffiness"]:.0f} ft' - msg += warn - self._axs[1].text(0.5, self._chunk.slices.loc[ind, 'alt_base'], - texify(msg), - va='center', ha='center', color=base_clr, - bbox={'facecolor': 'none', 'edgecolor': base_clr, - 'alpha': alpha, 'ls': '--'}) + if self._chunk.n_slices is not None: + for ind in range(self._chunk.n_slices): + + # Which hits are in the slice ? + in_slice = np.array(self._chunk.data['slice_id'] == + self._chunk.slices.at[ind, 'cluster_id']) + + # Create an array of facecolors ... choose them from my set of colors + base_clr = all_clrs[ind % len(all_clrs)] + fcs = np.array([base_clr] * len(self._chunk.data)) + fcs[is_vv] = 'none' + + # I can finally show the points ... + self._axs[0].scatter(self._chunk.data.loc[in_slice, 'dt'], + self._chunk.data.loc[in_slice, 'height'], + marker='o', s=10, c=fcs[in_slice], edgecolor=base_clr) + + # ... and the corresponding LOWESS-fit used to derive their fluffiness + _, lowess_pts = fluffer.get_fluffiness( + self._chunk.data.loc[in_slice, ['dt', 'height']].values, **self._chunk.prms['LOWESS']) + self._axs[0].plot(lowess_pts[:, 0], lowess_pts[:, 1], + ls='-', lw=1.5, c=base_clr, drawstyle='steps-mid', zorder=0) + + # Let's also plot the overlap area of the slice + slice_min = self._chunk.slices.loc[ind, 'height_min'] + slice_max = self._chunk.slices.loc[ind, 'height_max'] + thickness = self._chunk.slices.loc[ind, 'thickness'] + height_pad = self._chunk.prms['GROUPING_PRMS']['height_pad_perc']/100 + + # Get some fake data spanning the entire data range + misc = np.linspace(self._chunk.data['dt'].min(skipna=True), + self._chunk.data['dt'].max(skipna=True), 3) + self._axs[0].fill_between(misc, + np.ones_like(misc) * (slice_min - height_pad * thickness), + np.ones_like(misc) * (slice_max + height_pad * thickness), + edgecolor='none', alpha=0.1, zorder=0, + facecolor=base_clr) + + # Stop here if that slice has 0 okta. + if self._chunk.slices.iloc[ind]['okta'] == 0: + continue + + # Prepare to display the METAR codes for the slices. + # First, check if it is isolated or not, and significant or not. + if self._chunk.slices.iloc[ind]['significant']: + alpha = 1 + else: + alpha = 0 + if self._chunk.slices.iloc[ind]['isolated'] is False: + warn = r' $\Bumpeq$' + else: + warn = '' + + # Show the slice METAR text, plus the fluffiness, plus the isolation status + msg = r'\smaller ' + msg += wmo.okta2symb( + self._chunk.slices.iloc[ind]['okta'], + use_metsymb=dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') + msg += ' ' + self._chunk.slices.iloc[ind]['code'] + \ + rf' $f$:{self._chunk.slices.loc[ind, "fluffiness"]:.0f} ft' + msg += warn + self._axs[1].text(0.5, self._chunk.slices.loc[ind, 'height_base'], + texify(msg), + va='center', ha='center', color=base_clr, + bbox={'facecolor': 'none', 'edgecolor': base_clr, + 'alpha': alpha, 'ls': '--'}) def show_groups(self, show_points: bool = False) -> None: """ Show the group data. @@ -256,44 +257,45 @@ def show_groups(self, show_points: bool = False) -> None: transform=self._axs[2].transAxes) # Loop through each identified group - for ind in range(self._chunk.n_groups): - - if show_points: - # Which hits are in the group ? - in_group = np.array(self._chunk.data['group_id'] == - self._chunk.groups.at[ind, 'cluster_id']) - - # I can finally show the points ... - self._axs[0].scatter(self._chunk.data[in_group]['dt'], - self._chunk.data[in_group]['alt'], - marker=MRKS[ind % len(MRKS)], - s=40, c='none', edgecolor='gray', lw=1, zorder=10, alpha=0.5) - - # Stop here if that group has 0 okta. - if self._chunk.groups.iloc[ind]['okta'] == 0: - continue - - # Prepare to display the METAR codes for the groups. - # First, check if it is significant or not. - if self._chunk.groups.iloc[ind]['significant']: - alpha = 1 - else: - alpha = 0 - - # Then also check if these groups contain multiple sub-layers ... - symbs = {-1: r'', 1: r'$-$', 2: r'$=$', 3: r'$\equiv$'} - warn = ' ' + symbs[self._chunk.groups.iloc[ind]['ncomp']] - - # Show the group METAR text - msg = r'\smaller ' + wmo.okta2symb( - self._chunk.groups.iloc[ind]['okta'], - use_metsymb=(dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') - ) + ' ' + self._chunk.groups.iloc[ind]['code'] + warn - self._axs[2].text(0.5, self._chunk.groups.iloc[ind]['alt_base'], - texify(msg), - va='center', ha='center', color='gray', - bbox={'facecolor': 'none', 'edgecolor': 'gray', - 'alpha': alpha, 'ls': '--'}) + if self._chunk.n_groups is not None: + for ind in range(self._chunk.n_groups): + + if show_points: + # Which hits are in the group ? + in_group = np.array(self._chunk.data['group_id'] == + self._chunk.groups.at[ind, 'cluster_id']) + + # I can finally show the points ... + self._axs[0].scatter(self._chunk.data[in_group]['dt'], + self._chunk.data[in_group]['height'], + marker=MRKS[ind % len(MRKS)], + s=40, c='none', edgecolor='gray', lw=1, zorder=10, alpha=0.5) + + # Stop here if that group has 0 okta. + if self._chunk.groups.iloc[ind]['okta'] == 0: + continue + + # Prepare to display the METAR codes for the groups. + # First, check if it is significant or not. + if self._chunk.groups.iloc[ind]['significant']: + alpha = 1 + else: + alpha = 0 + + # Then also check if these groups contain multiple sub-layers ... + symbs = {-1: r'', 1: r'$-$', 2: r'$=$', 3: r'$\equiv$'} + warn = ' ' + symbs[self._chunk.groups.iloc[ind]['ncomp']] + + # Show the group METAR text + msg = r'\smaller ' + wmo.okta2symb( + self._chunk.groups.iloc[ind]['okta'], + use_metsymb=(dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') + ) + ' ' + self._chunk.groups.iloc[ind]['code'] + warn + self._axs[2].text(0.5, self._chunk.groups.iloc[ind]['height_base'], + texify(msg), + va='center', ha='center', color='gray', + bbox={'facecolor': 'none', 'edgecolor': 'gray', + 'alpha': alpha, 'ls': '--'}) def show_layers(self) -> None: """ Show the layer data. """ @@ -304,47 +306,48 @@ def show_layers(self) -> None: transform=self._axs[3].transAxes) # Start looping through every layer ... - for ind in range(self._chunk.n_layers): - - # Which hits are in the layer? - in_layer = np.array(self._chunk.data['layer_id'] == - self._chunk.layers.at[ind, 'cluster_id']) - - # I can finally show the points ... - self._axs[0].scatter(self._chunk.data[in_layer]['dt'], - self._chunk.data[in_layer]['alt'], - marker=MRKS[ind % len(MRKS)], - s=40, c='none', edgecolor='k', lw=1, zorder=10, alpha=0.5) - - # Draw the line of the layer base - if self._chunk.layers.iloc[ind]['okta'] == 0: - lls = ':' - else: - lls = '--' - - self._axs[0].axhline(self._chunk.layers.iloc[ind]['alt_base'], xmax=1, c='k', - lw=1, zorder=0, ls=lls, clip_on=False) - - # Stop here for empty layers - if self._chunk.layers.iloc[ind]['okta'] == 0: - continue - - # Prepare to display the METAR codes for the layer. - # First, check if it is significant, or not. - if self._chunk.layers.iloc[ind]['significant']: - alpha = 1 - else: - alpha = 0 - - # Display the actual METAR text - msg = r'\smaller ' + wmo.okta2symb( - self._chunk.layers.iloc[ind]['okta'], - use_metsymb=(dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') - ) + ' ' + self._chunk.layers.iloc[ind]['code'] - self._axs[3].text(0.5, self._chunk.layers.iloc[ind]['alt_base'], - texify(msg), - va='center', ha='center', color='k', - bbox={'facecolor': 'none', 'edgecolor': 'k', 'alpha': alpha}) + if self._chunk.n_layers is not None: + for ind in range(self._chunk.n_layers): + + # Which hits are in the layer? + in_layer = np.array(self._chunk.data['layer_id'] == + self._chunk.layers.at[ind, 'cluster_id']) + + # I can finally show the points ... + self._axs[0].scatter(self._chunk.data[in_layer]['dt'], + self._chunk.data[in_layer]['height'], + marker=MRKS[ind % len(MRKS)], + s=40, c='none', edgecolor='k', lw=1, zorder=10, alpha=0.5) + + # Draw the line of the layer base + if self._chunk.layers.iloc[ind]['okta'] == 0: + lls = ':' + else: + lls = '--' + + self._axs[0].axhline(self._chunk.layers.iloc[ind]['height_base'], xmax=1, c='k', + lw=1, zorder=0, ls=lls, clip_on=False) + + # Stop here for empty layers + if self._chunk.layers.iloc[ind]['okta'] == 0: + continue + + # Prepare to display the METAR codes for the layer. + # First, check if it is significant, or not. + if self._chunk.layers.iloc[ind]['significant']: + alpha = 1 + else: + alpha = 0 + + # Display the actual METAR text + msg = r'\smaller ' + wmo.okta2symb( + self._chunk.layers.iloc[ind]['okta'], + use_metsymb=(dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') + ) + ' ' + self._chunk.layers.iloc[ind]['code'] + self._axs[3].text(0.5, self._chunk.layers.iloc[ind]['height_base'], + texify(msg), + va='center', ha='center', color='k', + bbox={'facecolor': 'none', 'edgecolor': 'k', 'alpha': alpha}) def add_vv_legend(self) -> None: """ Adds a legend about the VV hits.""" @@ -385,7 +388,7 @@ def add_geoloc_and_ref_dt(self) -> None: self._axs[2].text(0.5, -0.02, texify(r'\smaller ' + '\n '.join(msg)), transform=self._axs[2].transAxes, ha='center', va='top') - def add_ref_metar(self, name: str, metar: str) -> None: + def add_ref_metar(self, name: Union[str, None], metar: Union[str, None]) -> None: """ Display a reference METAR, for example from human observers, different code, etc ... Args: @@ -410,7 +413,7 @@ def add_metar(self) -> None: msg = r'\smaller \bf ampycloud: ' + self._chunk.metar_msg() if self._chunk.msa is not None: - msg += '\n' + rf'\smaller\smaller MSA: {self._chunk.msa} ft' + msg += '\n' + rf'\smaller\smaller MSA: {self._chunk.msa} ft aal' # Show the msg ... self._axs[2].text(0.5, 1.25, texify(msg), @@ -424,7 +427,7 @@ def format_primary_axes(self) -> None: # Add the axes labels self._axs[0].set_xlabel(r'$\Delta t$ [s]', labelpad=10) - self._axs[0].set_ylabel(r'Alt. [ft]', labelpad=10) + self._axs[0].set_ylabel(r'Height [ft aal]', labelpad=10) def format_slice_axes(self) -> None: """ Format the duplicate axes related to the slicing part. @@ -432,7 +435,7 @@ def format_slice_axes(self) -> None: """ # Here, only proceed if I have actually found some slices ! - if self._chunk.n_slices > 0: + if self._chunk.n_slices is not None and self._chunk.n_slices > 0: # In order to show secondary_axis, we need to feed the forward/reverse scaling function # with the actual ones used in the code. Since these are dependant upon the data, @@ -442,10 +445,10 @@ def format_slice_axes(self) -> None: 'shift-and-scale', {'scale': self._chunk.prms['SLICING_PRMS']['dt_scale']}) - (alt_scale_kwargs, alt_descale_kwargs) = \ - get_scaling_kwargs(self._chunk.data['alt'].values, - self._chunk.prms['SLICING_PRMS']['alt_scale_mode'], - self._chunk.prms['SLICING_PRMS']['alt_scale_kwargs']) + (height_scale_kwargs, height_descale_kwargs) = \ + get_scaling_kwargs(self._chunk.data['height'].values, + self._chunk.prms['SLICING_PRMS']['height_scale_mode'], + self._chunk.prms['SLICING_PRMS']['height_scale_kwargs']) # Then add the secondary axis, using partial function to define the back-and-forth # conversion functions. @@ -458,15 +461,15 @@ def format_slice_axes(self) -> None: secax_y = self._axs[0].secondary_yaxis( 1.03, functions=(partial(scaler.apply_scaling, - fct=self._chunk.prms['SLICING_PRMS']['alt_scale_mode'], - **alt_scale_kwargs), + fct=self._chunk.prms['SLICING_PRMS']['height_scale_mode'], + **height_scale_kwargs), partial(scaler.apply_scaling, - fct=self._chunk.prms['SLICING_PRMS']['alt_scale_mode'], - **alt_descale_kwargs))) + fct=self._chunk.prms['SLICING_PRMS']['height_scale_mode'], + **height_descale_kwargs))) # Add the axis labels secax_x.set_xlabel(texify(r'\smaller Slicing $\Delta t$')) - secax_y.set_ylabel(texify(r'\smaller Slicing Alt.')) + secax_y.set_ylabel(texify(r'\smaller Slicing height')) # And reduce the fontsize while we're at it ... secax_x.tick_params(axis='x', which='both', labelsize=rcParams['font.size']-2) @@ -475,12 +478,12 @@ def format_slice_axes(self) -> None: def format_group_axes(self) -> None: """ Format the duplicate axes related to the grouping part. - TODO: add secondary axis for the altitude rescaling as well. See #91. + TODO: add secondary axis for the height rescaling as well. See #91. """ # Only proceed if I have found some clusters ... - if self._chunk.n_groups > 0: + if self._chunk.n_groups is not None and self._chunk.n_groups > 0: # In order to show secondary_axis, we need to feed the forward/reverse scaling function # with the actual ones used in the code. Since these are dependant upon the data, @@ -504,7 +507,7 @@ def format_group_axes(self) -> None: # And reduce the fontsize while we're at it ... secax_x.tick_params(axis='x', which='both', labelsize=rcParams['font.size']-2) - def save(self, fn_out: str, fmts: list = None) -> None: + def save(self, fn_out: str, fmts: Union[list, None] = None) -> None: """ Saves the plot to file. Args: diff --git a/src/ampycloud/plots/secondary.py b/src/ampycloud/plots/secondary.py index 8325863..309b533 100644 --- a/src/ampycloud/plots/secondary.py +++ b/src/ampycloud/plots/secondary.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2023 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -28,7 +28,7 @@ @set_mplstyle @log_func_call(logger) def scaling_fcts(show: bool = True, - save_stem: str = None, save_fmts: Union[list, str] = None) -> None: + save_stem: Union[str, None] = None, save_fmts: Union[list, str, None] = None) -> None: """ Plots the different scaling functions. Args: @@ -38,7 +38,7 @@ def scaling_fcts(show: bool = True, save_fmts (list|str, optional): a list of file formats to export the plot to. Defaults to None = ['png']. - This is a small utility routine to rapidly see the different altitude scaling options used by + This is a small utility routine to rapidly see the different height scaling options used by ampycloud. For the "step" scaling plot, the parameters are taken straight from dynamic.GROUPING_PRMS. @@ -65,17 +65,17 @@ def scaling_fcts(show: bool = True, ax3 = fig.add_subplot(fig_gs[0, 2]) # Plot the different scaling laws - alts = np.arange(0, 25000, 10) + heights = np.arange(0, 25000, 10) # Plot the slicing scale - ax1.plot(alts, apply_scaling(alts, fct='shift-and-scale', **{'scale': 1000}), c='k', lw=2) + ax1.plot(heights, apply_scaling(heights, fct='shift-and-scale', **{'scale': 1000}), c='k', lw=2) ax1.set_title(texify(r'\smaller shift-and-scale')) - ax2.plot(alts, apply_scaling(alts, fct='minmax-scale'), c='k', lw=2) + ax2.plot(heights, apply_scaling(heights, fct='minmax-scale'), c='k', lw=2) ax2.set_title(texify(r'\smaller minmax-scale')) - ax3.plot(alts, apply_scaling( - alts, fct='step-scale', + ax3.plot(heights, apply_scaling( + heights, fct='step-scale', **{'steps': [3000, 8000], 'scales': [100, 500, 1000]}), c='k', lw=2) ax3.set_title(texify(r'\smaller step-scale')) diff --git a/src/ampycloud/plots/tools.py b/src/ampycloud/plots/tools.py index d7b37de..3b9ad52 100644 --- a/src/ampycloud/plots/tools.py +++ b/src/ampycloud/plots/tools.py @@ -135,9 +135,9 @@ def texify(msg: str) -> str: msg = msg.replace('%', r'\%') # Here, I want to clean the underscore, but ONLY outside of math mode. - msg = [item.replace('_', r'\_') if ind % 2 == 0 else item + msg_splitted = [item.replace('_', r'\_') if ind % 2 == 0 else item for (ind, item) in enumerate(msg.split('$'))] - msg = '$'.join(msg) + msg = '$'.join(msg_splitted) # Next cleanup any LaTeX-specific stuff ... else: msg = msg.replace(r'\smaller', '') diff --git a/src/ampycloud/prms/ampycloud_default_prms.yml b/src/ampycloud/prms/ampycloud_default_prms.yml index 90d8c00..87aae3e 100644 --- a/src/ampycloud/prms/ampycloud_default_prms.yml +++ b/src/ampycloud/prms/ampycloud_default_prms.yml @@ -13,8 +13,8 @@ # Plotting style. Can be one of 'base', 'latex', 'metsymb' MPL_STYLE: 'base' -# Minimum Sector Altitude, in ft. No cloud layers with a base above this value will be reported -# in the ampycloud METAR messages. Set it to null for no limit. +# Minimum Sector Altitude, in ft above aerodrome level. No cloud layers with a base above this value +# will be reported in the ampycloud METAR messages. Set it to null for no limit. MSA: null # Additional distance above the MSA value (in ft) beyond which hits will be ignored (=cropped) by @@ -29,32 +29,32 @@ MAX_HITS_OKTA0: 3 MAX_HOLES_OKTA8: 1 # Number (in %) of the cloud hit heights (sorted from smallest to highest) to ignore when -# computing the cloud altitude base, taken to be the min altitude of the +# computing the cloud base height, taken to be the min height of the # 100-LAYER_BASE_LVL_PERC % remaining points. -BASE_LVL_ALT_PERC: 5 +BASE_LVL_HEIGHT_PERC: 5 # Number of Slice/Group/Layer points (in %) - starting from the most recent ones - -# to use to compute the cloud base altitude. 30 would use only the 30% youngest hits. +# to use to compute the cloud base height. 30 would use only the 30% youngest hits. # Set to 100 to use all the points. BASE_LVL_LOOKBACK_PERC: 100 -# LOWESS parameters used to fit Slice/Groups/Layers altitude trends +# LOWESS parameters used to fit Slice/Groups/Layers height trends LOWESS: # Fraction of the slice/group/layer points to consider when deriving the LOWESS smooth fit. # Values too small will result in overfitting. Values too large will miss rapid - # altitude fluctuations. + # height fluctuations. frac: 0.35 # Number of LOWESS fitting iterations it: 3 -# Minimum separation (in ft) between the base altitudes of identified layers inside groups -# with a base altitude in the bins set by min_sep_lims. Any layers separated by less than this +# Minimum separation (in ft) between the base heights of identified layers inside groups +# with a base height in the bins set by min_sep_lims. Any layers separated by less than this # value will be remerged into one layer. Any value < than 5 times the vertical resolution of the data will # raise a warning, as it may lead to an over-layering of groups. For the ampycloud METAR # messages to be consistent with the ICAO rules of layer selection, this should be >=100 ft # below 10'000 ft. MIN_SEP_VALS: [250, 1000] -# Altitude threshold (in ft) separating the min_sep_val elements. +# Height threshold (in ft) separating the min_sep_val elements. # The 0 and + infty extrema will be automatically added to the list by ampycloud. # Just list here the intermediate steps. MIN_SEP_LIMS: [10000] @@ -65,24 +65,22 @@ SLICING_PRMS: distance_threshold: 0.2 # Time scaling factor dt_scale: 100000 - # Altitude scaling mode - alt_scale_mode: minmax-scale - # Altitude scaling parameters (in ft) - alt_scale_kwargs: + # Height scaling mode + height_scale_mode: minmax-scale + # Height scaling parameters (in ft) + height_scale_kwargs: min_range: 1000 # Clustering parameters GROUPING_PRMS: - # Padding (in % of the slice altitude range) to add above and below slices + # Padding (in % of the slice height range) to add above and below slices # before looking for overlaps - alt_pad_perc: +10 + height_pad_perc: +10 # Time scaling factor (in s) dt_scale: 180 - # Boost factor, applied to the slice fluffiness to derive the altitude scaling factor - fluffiness_boost: 2 - # Range of allowed altitude scaling factor (in ft). The fluffiness_boost will never be allowed - # to push the alt_scaling beyond this range. - alt_scale_range: [100, 500] + # Range of allowed height scaling factor (in ft). The fluffiness will never be allowed + # to push the height_scaling beyond this range. + height_scale_range: [100, 500] # Layering parameters LAYERING_PRMS: diff --git a/src/ampycloud/scaler.py b/src/ampycloud/scaler.py index ee115d5..3cdf27a 100644 --- a/src/ampycloud/scaler.py +++ b/src/ampycloud/scaler.py @@ -22,7 +22,7 @@ @log_func_call(logger) -def shift_and_scale(vals: np.ndarray, shift: Union[int, float] = None, +def shift_and_scale(vals: np.ndarray, shift: Union[int, float, None] = None, scale: Union[int, float] = 1, mode: str = 'do') -> np.ndarray: """ Shift (by a constant) and scale (by a constant) the data. @@ -58,8 +58,8 @@ def shift_and_scale(vals: np.ndarray, shift: Union[int, float] = None, @log_func_call(logger) def minmax_scale(vals: np.ndarray, - min_val: Union[float, int] = None, - max_val: Union[float, int] = None, + min_val: Union[float, int, None] = None, + max_val: Union[float, int, None] = None, mode: str = 'do') -> np.ndarray: """ Rescale the data onto a [0, 1] interval, possibly forcing a specific and/or minimum interval range. @@ -154,11 +154,11 @@ def step_scale(vals: np.ndarray, offsets = [0] + steps # Get the bin edges for the scale mode - edges_in = [-np.infty] + steps + [np.infty] + edges_in = [-np.inf] + steps + [np.inf] # Idem for the descale mode ... this is more complex because of the continuity requirement edges_out = [steps[0]/scales[0] + np.sum((np.diff(steps)/scales[1:-1])[:ind]) for ind in range(len(steps))] - edges_out = [-np.infty] + edges_out + [np.infty] + edges_out = [-np.inf] + edges_out + [np.inf] # Prepare the output out = np.full_like(vals, np.nan, dtype=float) @@ -195,7 +195,7 @@ def step_scale(vals: np.ndarray, @log_func_call(logger) -def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: +def convert_kwargs(vals: np.ndarray, fct: str, **kwargs) -> dict: """ Converts the user-input keywords such that they can be fed to the underlying scaling functions. @@ -221,10 +221,10 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: if fct == 'shift-and-scale': # In this case, the only data I may need to derive from the data is the shift. - if 'shift' in kwargs.keys(): + if 'shift' in kwargs: # Already set - do nothing return kwargs - if 'mode' in kwargs.keys(): + if 'mode' in kwargs: if kwargs['mode'] == 'do': kwargs['shift'] = np.nanmax(vals) elif kwargs['mode'] == 'undo': @@ -240,12 +240,12 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: if fct == 'minmax-scale': # In this case, the challenge lies with identifying min_val and max_val, knowing that the # user may specify a min_range value. - if 'min_val' in kwargs.keys() and 'max_val' in kwargs.keys(): + if 'min_val' in kwargs and 'max_val' in kwargs: # Already specified ... do nothing return kwargs - if 'mode' in kwargs.keys(): + if 'mode' in kwargs: if kwargs['mode'] == 'do': - if 'min_range' in kwargs.keys(): + if 'min_range' in kwargs: min_range = kwargs['min_range'] kwargs.pop('min_range', None) else: @@ -260,7 +260,7 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: raise AmpycloudError(f"Mode unknown: {kwargs['mode']}") # 'mode' not set -> will default to 'do' - if 'min_range' in kwargs.keys(): + if 'min_range' in kwargs: min_range = kwargs['min_range'] kwargs.pop('min_range', None) else: @@ -276,7 +276,7 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: @log_func_call(logger) -def apply_scaling(vals: np.ndarray, fct: str = None, **kwargs: dict) -> np.ndarray: +def apply_scaling(vals: np.ndarray, fct: Union[str, None] = None, **kwargs) -> np.ndarray: """ Umbrella scaling routine, that gathers all the individual ones under a single entry point. Args: diff --git a/src/ampycloud/utils/mocker.py b/src/ampycloud/utils/mocker.py index 7617aa5..10f0ba1 100644 --- a/src/ampycloud/utils/mocker.py +++ b/src/ampycloud/utils/mocker.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -13,6 +13,7 @@ from typing import Union import numpy as np import pandas as pd +from pandas import DataFrame # import from ampycloud from ..logger import log_func_call @@ -25,37 +26,38 @@ @log_func_call(logger) -def flat_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float) -> pd.DataFrame: - """ Generates a mock, flat, Gaussian cloud layer around a given altitude. +def flat_layer(dts: np.ndarray, height: float, height_std: float, + sky_cov_frac: float) -> DataFrame: + """ Generates a mock, flat, Gaussian cloud layer around a given height. Args: dts (np.array of float): time deltas, in s, for the simulated ceilometer hits. - alt (float): layer mean altitude, in ft. - alt_std (float): layer altitude standard deviation, in ft. + height (float): layer mean height, in ft above aerodrome level (aal). + height_std (float): layer height standard deviation, in ft. sky_cov_frac (float): Sky coverage fraction. Random hits will be set to NaN to reach this value. Must be 0 <= x <= 1. Returns: - :py:class:`pandas.DataFrame`: the simulated layer with columns ['dt', 'alt']. + :py:class:`pandas.DataFrame`: the simulated layer with columns ['dt', 'height']. """ # How many points do I need to generate ? n_pts = len(dts) # Create the storage structure - out = pd.DataFrame(columns=['dt', 'alt'], dtype=float) + out = DataFrame(columns=['dt', 'height'], dtype=float) - # Generate the random altitude data - out['alt'] = np.random.normal(loc=alt, scale=alt_std, size=n_pts) - # Cleanup any negative altitudes, if warranted. - out.loc[out['alt'] <= 0, 'alt'] = np.nan + # Generate the random height data + out['height'] = np.random.normal(loc=height, scale=height_std, size=n_pts) + # Cleanup any negative heights, if warranted. + out.loc[out['height'] <= 0, 'height'] = np.nan out['dt'] = dts # Empty hits to get the requested sky coverage fraction # First extract the hits I want to keep ... to_keep = out.sample(frac=sky_cov_frac) - # ... then get rid of the alt values everywhere ... - out.loc[:, 'alt'] = np.nan + # ... then get rid of the height values everywhere ... + out.loc[:, 'height'] = np.nan # ... and re-set the values I choose to keep. out.loc[to_keep.index] = to_keep @@ -63,34 +65,34 @@ def flat_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float) - @log_func_call(logger) -def sin_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float, - period: Union[int, float], amplitude: Union[int, float]) -> pd.DataFrame: +def sin_layer(dts: np.ndarray, height: float, height_std: float, sky_cov_frac: float, + period: Union[int, float], amplitude: Union[int, float]) -> DataFrame: """ Generates a sinusoidal cloud layer. Args: dts (np.array of float): time deltas, in s, for the simulated ceilometer hits. - alt (float): layer mean altitude, in ft. - alt_std (float): layer altitude standard deviation, in ft. + height (float): layer mean height, in ft above aerodrome level (aal). + height_std (float): layer height standard deviation, in ft. sky_cov_frac (float, optional): Sky coverage fraction. Random hits will be set to NaN to reach this value. Must be 0 <= x <= 1. period (int|float): period of the sine-wave, in s. amplitude (int|float): amplitude of the sine-wave, in ft. Returns: - :py:class:`pandas.DataFrame`: the simulated layer with columns ['alt', 'dt']. + :py:class:`pandas.DataFrame`: the simulated layer with columns ['height', 'dt']. """ # First, get a flat layer - out = flat_layer(dts, alt, alt_std, sky_cov_frac) + out: DataFrame = flat_layer(dts, height, height_std, sky_cov_frac) # And add to it a sinusoidal fluctuations. Note that nan should stay nan. - out.loc[:, 'alt'] = out.loc[:, 'alt'] + np.sin(-np.pi/2 + out['dt']/period*2*np.pi) * amplitude + out.loc[:, 'height'] = out.loc[:, 'height'] + \ + np.sin(-np.pi/2 + out['dt']/period*2*np.pi) * amplitude return out -def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, - layer_prms: list) -> pd.DataFrame: +def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, layer_prms: list) -> DataFrame: """ Generate a mock set of cloud layers for a specified number of ceilometers. Args: @@ -103,13 +105,13 @@ def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, from ``lookback_time`` and ``hit_gap``): :: - {'alt':1000, 'alt_std': 100, 'sky_cov_frac': 1, + {'height':1000, 'height_std': 100, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0} Returns: :py:class:`pandas.DataFrame`: a pandas DataFrame with the mock data, ready to be fed to - ampycloud. Columns ['ceilo', 'dt', 'alt', 'type'] correspond to 1) ceilo names, 2) time - deltas in s, 3) hit altitudes in ft, and 4) hit type. + ampycloud. Columns ['ceilo', 'dt', 'height', 'type'] correspond to 1) ceilo names, 2) time + deltas in s, 3) hit heights in ft aal, and 4) hit type. TODO: - add the possibility to set some VV hits in the mix @@ -124,10 +126,10 @@ def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, if not isinstance(item, dict): raise AmpycloudError(f'Element {ind} from layer_prms should be a dict,' + f' not: {type(item)}') - if not all(key in item.keys() for key in ['alt', 'alt_std', 'sky_cov_frac', + if not all(key in item.keys() for key in ['height', 'height_std', 'sky_cov_frac', 'period', 'amplitude']): raise AmpycloudError('One or more of the following dict keys are missing in ' + - f"layer_prms[{ind}]: 'alt', 'alt_std', 'sky_cov_frac'," + + f"layer_prms[{ind}]: 'height', 'height_std', 'sky_cov_frac'," + "'period', 'amplitude'.") # Let's create the layers individually for each ceilometer @@ -139,42 +141,42 @@ def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, dts = np.random.random(n_pts) * -lookback_time # Let's now loop through each cloud layer and generate them - layers = [sin_layer(dts=dts, **prms) for prms in layer_prms] + layers: list[DataFrame] = [sin_layer(dts=dts, **prms) for prms in layer_prms] # Merge them all into one DataFrame ... - layers = pd.concat(layers).reset_index(drop=True) + merged_layers: DataFrame = pd.concat(layers).reset_index(drop=True) # Add the type column while I'm at it. Set it to None for now. - layers['type'] = None + merged_layers['type'] = None # Here, adjust the types so that it ranks lowest to highest for every dt step. # This needs to be done on a point by point basis, given that layers can cross each other. - for dt in np.unique(layers['dt']): - # Get the hit altitudes, and sort them from lowest to highest - alts = layers[layers['dt'] == dt]['alt'].sort_values(axis=0) + for dt in np.unique(merged_layers['dt']): + # Get the hit heights, and sort them from lowest to highest + heights = merged_layers[merged_layers['dt'] == dt]['height'].sort_values(axis=0) # Then deal with the other ones - for (a, alt) in enumerate(alts): + for (a, height) in enumerate(heights): # Except for the first one, any NaN hit gets dropped - if a > 0 and np.isnan(alt): - layers.drop(index=alts.index[a], + if a > 0 and np.isnan(height): + merged_layers.drop(index=heights.index[a], inplace=True) - elif np.isnan(alt): + elif np.isnan(height): # A non-detection should be type 0 - layers.loc[alts.index[a], 'type'] = 0 + merged_layers.loc[heights.index[a], 'type'] = 0 else: - layers.loc[alts.index[a], 'type'] = a+1 + merged_layers.loc[heights.index[a], 'type'] = a+1 # Add the ceilo info as an int - layers['ceilo'] = str(ceilo) + merged_layers['ceilo'] = str(ceilo) # And store this for later - ceilos += [layers] + ceilos += [merged_layers] # Merge it all - out = pd.concat(ceilos) + out: DataFrame = pd.concat(ceilos) # Sort the timesteps in order, and reset the index - out = out.sort_values(['dt', 'alt']).reset_index(drop=True) + out = out.sort_values(['dt', 'height']).reset_index(drop=True) # Fix the dtypes for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): @@ -183,7 +185,7 @@ def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, return out -def canonical_demo_data() -> pd.DataFrame: +def canonical_demo_data() -> DataFrame: """ This function creates the canonical ampycloud demonstration dataset, that can be used to illustrate the full behavior of the algorithm. @@ -194,17 +196,24 @@ def canonical_demo_data() -> pd.DataFrame: # Create the "famous" mock dataset n_ceilos = 4 - lookback_time = 1200 - hit_gap = 30 + lookback_time = 900 + hit_gap = 15 - lyrs = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 0.1, 'period': 10, 'amplitude': 0}, - {'alt': 2000, 'alt_std': 100, 'sky_cov_frac': 0.5, 'period': 10, 'amplitude': 0}, - {'alt': 5000, 'alt_std': 200, 'sky_cov_frac': 1, 'period': 2400, 'amplitude': 1000}, + lyrs = [{'height': 1000, 'height_std': 100, 'sky_cov_frac': 0.1, 'period': 10, 'amplitude': 0}, + {'height': 2000, 'height_std': 100, 'sky_cov_frac': 0.5, 'period': 10, 'amplitude': 0}, + {'height': 5000, 'height_std': 200, 'sky_cov_frac': 1, 'period': 1800, + 'amplitude': 1000}, ] # Reset the random seed, but only do this temporarily, so as to not mess things up for the user. with utils.tmp_seed(42): # Actually generate the mock data - out = mock_layers(n_ceilos, lookback_time, hit_gap, lyrs) + out: DataFrame = mock_layers(n_ceilos, lookback_time, hit_gap, lyrs) + + # Add a rogue hit, to illustrate the fact that layers that fall below Theta_0 are not + # reported in the diagnostic diagram, but remain "counted". + # To avoid confusing users, we will replace an existing hit: pick one closest to -800s. + swap_id = (abs(out[(out['ceilo'] == '1')*(out['type'] == 2)]['dt']-800)).argmin() + out.at[swap_id, 'height'] = 3100.0 return out diff --git a/src/ampycloud/utils/performance.py b/src/ampycloud/utils/performance.py index 655e2d3..bbf74a5 100644 --- a/src/ampycloud/utils/performance.py +++ b/src/ampycloud/utils/performance.py @@ -44,8 +44,6 @@ def get_speed_benchmark(niter: int = 10) -> tuple: _, _ = demo() dts += [(datetime.now()-start).total_seconds()] - dts = np.array(dts) - logger.info('ampycloud demo() exec-time from %i runs:', niter) logger.info(' mean [std]: %.2fs [%.2fs]', np.mean(dts), np.std(dts)) logger.info(' median [min; max]: %.2f [%.2fs; %.2fs]', diff --git a/src/ampycloud/utils/utils.py b/src/ampycloud/utils/utils.py index 072b78c..331de0e 100644 --- a/src/ampycloud/utils/utils.py +++ b/src/ampycloud/utils/utils.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2022-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -10,11 +10,11 @@ # Import from Python import logging +from typing import Union import warnings import contextlib import copy import numpy as np -import numpy.typing as npt import pandas as pd # Import from this package @@ -28,7 +28,7 @@ @log_func_call(logger) def check_data_consistency(pdf: pd.DataFrame, - req_cols: dict = None) -> pd.DataFrame: + req_cols: Union[dict, None] = None) -> pd.DataFrame: """ Assesses whether a given :py:class:`pandas.DataFrame` is compatible with the requirements of ampycloud. @@ -50,7 +50,7 @@ def check_data_consistency(pdf: pd.DataFrame, column names/types (formally defined in :py:data:`ampycloud.hardcoded.REQ_DATA_COLS`): :: - 'ceilo'/pd.StringDtype(), 'dt'/float, 'alt'/float, 'type'/int + 'ceilo'/pd.StringDtype(), 'dt'/float, 'height'/float, 'type'/int The ``ceilo`` column contains the names/ids of the ceilometers as ``pd.StringDtype()``. See the pandas @@ -62,8 +62,8 @@ def check_data_consistency(pdf: pd.DataFrame, time of the METAR message, such that ``dt`` values are negative, with the smallest one corresponding to the oldest measurement. - The ``alt`` column contains the cloud base hit altitudes reported by the ceilometers, in ft - above ground. + The ``height`` column contains the cloud base hit heights reported by the ceilometers, in ft + above aerodrome level. The ``type`` column contains integers that correspond to the hit *sequence id*. If a given ceilometer is reporting multiple hits for a given timestep (corresponding to a cloud level 1, @@ -71,11 +71,11 @@ def check_data_consistency(pdf: pd.DataFrame, ``2``, ``3``, etc ... Any data point with a ``type`` of ``-1`` will be flagged in the ampycloud plots as a vertical Visibility (VV) hit, **but it will not be treated any differently than any other regular hit**. Type ``0`` corresponds to no (cloud) detection, in which case the - corresponding hit altitude should be a NaN. + corresponding hit height should be a NaN. Important: A **non-detection** corresponds to a valid measurement with a ``dt`` value, a ``type 0``, - and ``NaN`` as the altitude. It should not be confused with a **non-observation**, + and ``NaN`` as the height. It should not be confused with a **non-observation**, when no data was acquired at all ! If it all sounds confusing, it is possible to obtain an example of the required data format @@ -96,15 +96,24 @@ def check_data_consistency(pdf: pd.DataFrame, * ``pdf`` is not a :py:class:`pandas.DataFrame`. * ``pdf`` is missing a required column. * ``pdf`` has a length of 0. + * ``pdf`` has duplicated rows. + * any time step for any ceilometer corresponds to both a type 0 (no hit) and not 0 (some + hit) + * any time step for any ceilometer corresponds to both a type -1 (VV hit) and not -1 (some + hit/no hit) + + The latter check implies that ampycloud cannot be fed a VV hit in parallel to a cloud base hit. + Should a specific ceilometer return VV hits in parallel to cloud base hits, it is up to the user + to decide whether to feed one or the other. In addition, this will raise an :py:class:`ampycloud.errors.AmpycloudWarning` if: * any of ``pdf`` column type is not as expected. Note that in this case, the code will try to correct the type on the fly. * ``pdf`` has any superfluous columns. In this case, the code will drop them automatically. - * Any hit altitude is negative. - * Any ``type 0`` hit has a non-NaN altitude. - * Any ``type 1`` hit has a NaN altitude. + * Any hit height is negative. + * Any ``type 0`` hit has a non-NaN height. + * Any ``type 1`` hit has a NaN height. * Any ``type 2`` hit does not have a coincident ``type 1`` hit. * Any ``type 3`` hit does not have a coincident ``type 2`` hit. @@ -148,21 +157,38 @@ def check_data_consistency(pdf: pd.DataFrame, warnings.warn(f'Column {key} is not required by ampycloud.', AmpycloudWarning) logger.warning('Dropping the superfluous %s column from the input data.', key) - data.drop((key), axis=1, inplace=True) - - # A brief sanity check of the altitudes. We do not issue Errors, since the code can cope + data.drop(key, axis=1, inplace=True) + + # Check for any duplicated entry, which would make no sense. + if (duplic := data.duplicated()).any(): + raise AmpycloudError('Duplicated hits in the input data:\n' + f'{data[duplic].to_string(index=False)}') + + # Check for inconsistencies + # 1 - A non-detection should not be coincident with a detection + # 2 - A VV hit should not be coincident with a hit or a non-detection + for hit_type in [0, -1]: + nodets = data[data['type'] == hit_type][['dt', 'ceilo']] + dets = data[data['type'] != hit_type][['dt', 'ceilo']] + merged = dets.merge(nodets, how='inner', on=['dt', 'ceilo']) + if len(merged) > 0: + raise AmpycloudError('Inconsistent input data ' + f'(simultaneous type {hit_type} and !{hit_type}):\n' + f'{merged.to_string(index=False)}') + + # A brief sanity check of the heights. We do not issue Errors, since the code can cope # with those elements: we simply raise Warnings. msgs = [] - if np.any(data.loc[:, 'alt'].values < 0): - msgs += ['Some hit altitudes are negative ?!'] - if not np.all(np.isnan(data.loc[data.type == 0, 'alt'])): - msgs += ['Some type=0 hits have non-NaNs altitude values ?!'] - if np.any(np.isnan(data.loc[data.type == 1, 'alt'])): - msgs += ['Some type=1 hits have NaNs altitude values ?!'] - if not np.all(np.in1d(data.loc[data.type == 2, 'dt'].values, + if np.any(data.loc[:, 'height'].values < 0): + msgs += ['Some hit heights are negative ?!'] + if not np.all(np.isnan(data.loc[data.type == 0, 'height'])): + msgs += ['Some type=0 hits have non-NaNs height values ?!'] + if np.any(np.isnan(data.loc[data.type == 1, 'height'])): + msgs += ['Some type=1 hits have NaNs height values ?!'] + if not np.all(np.isin(data.loc[data.type == 2, 'dt'].values, data.loc[data.type == 1, 'dt'].values)): msgs += ['Some type=2 hits have no coincident type=1 hits ?!'] - if not np.all(np.in1d(data.loc[data.type == 3, 'dt'].values, + if not np.all(np.isin(data.loc[data.type == 3, 'dt'].values, data.loc[data.type == 2, 'dt'].values)): msgs += ['Some type=3 hits have no coincident type=2 hits ?!'] @@ -206,7 +232,7 @@ def tmp_seed(seed: int): @log_func_call(logger) -def adjust_nested_dict(ref_dict: dict, new_dict: dict, lvls: list = None) -> dict: +def adjust_nested_dict(ref_dict: dict, new_dict: dict, lvls: Union[list, None] = None) -> dict: """ Update a given (nested) dictionnary given a second (possibly incomplete) one. Args: @@ -241,23 +267,22 @@ def adjust_nested_dict(ref_dict: dict, new_dict: dict, lvls: list = None) -> dic return ref_dict -def calc_base_alt( - vals: npt.ArrayLike, - lookback_perc: int, - alt_perc: int, - ) -> float: - """Calculate the layer base altitude. +def calc_base_height(vals: np.ndarray, + lookback_perc: int, + height_perc: int, + ) -> float: + """Calculate the layer base height. Args: vals (npt.ArrayLike): Ceilometer hits of a given layer. Must be a flat array/ Series of scalars and ordered in time, most recent entries last. lookback_perc (int): Percentage of points to take into account. 100% would correspond to all points, 50% to the recent half, etc. - alt_perc (int): Percentage of points that should be neglected when calculating + height_perc (int): Percentage of points that should be neglected when calculating the base height. Base height will be the minimum of the remaining points. Returns: - float: The layer base altitude. + float: The layer base height. Raises: AmpycloudError: Raised if the array passed to the n_largest percentile calculation @@ -267,7 +292,7 @@ def calc_base_alt( n_latest_elements = vals[- int(len(vals) * lookback_perc / 100):] if len(n_latest_elements) == 0: raise AmpycloudError( - 'Cloud base calculation got an empty array.' - 'Maybe check lookback percentage (is set to %i)' %lookback_perc + 'Cloud base calculation got an empty array. ' + f'Maybe check lookback percentage ? (currently set to {lookback_perc})' ) - return np.percentile(n_latest_elements, alt_perc) + return np.percentile(n_latest_elements, height_perc) diff --git a/src/ampycloud/version.py b/src/ampycloud/version.py index 8b104d3..21f9e0c 100644 --- a/src/ampycloud/version.py +++ b/src/ampycloud/version.py @@ -9,4 +9,4 @@ """ #:str: the one-and-only place where the ampycloud version is set. -VERSION = '1.0.0' +VERSION = '2.0.0' diff --git a/src/ampycloud/wmo.py b/src/ampycloud/wmo.py index d5a7597..35097cb 100644 --- a/src/ampycloud/wmo.py +++ b/src/ampycloud/wmo.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -10,7 +10,7 @@ # Import from Python import logging -from typing import Union +from typing import Optional, Union import numpy as np # Import from ampycloud @@ -79,7 +79,7 @@ def perc2okta(val: Union[int, float, np.ndarray]) -> np.ndarray: @log_func_call(logger) -def okta2code(val: int) -> str: +def okta2code(val: int) -> Optional[str]: """ Convert an okta value to a METAR code. Args: @@ -165,12 +165,12 @@ def okta2symb(val: int, use_metsymb: bool = False) -> str: @log_func_call(logger) -def alt2code(val: Union[int, float]) -> str: - """ Function that converts a given altitude in hundreds of ft (3 digit number), +def height2code(val: Union[int, float]) -> str: + """ Function that converts a given height in hundreds of ft (3 digit number), e.g. 5000 ft -> 050, 500 ft -> 005. Args: - val (int, float): the altitude to convert, in feet. + val (int, float): the height to convert, in feet. Returns: str: the corresponding METAR code chunk diff --git a/test/ampycloud/plots/test_core.py b/test/ampycloud/plots/test_core.py index c43a186..a39ed15 100644 --- a/test/ampycloud/plots/test_core.py +++ b/test/ampycloud/plots/test_core.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -34,10 +34,10 @@ def test_large_dts(mpls): # Let's create some data with only NaNs data = pd.DataFrame([['1', 7e5, 1000, 1], ['1', 7e5+1, 1001, 1], ['1', 7e5+2, 1002, 1]], - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) data['ceilo'] = data['ceilo'].astype(pd.StringDtype()) data['dt'] = data['dt'].astype(float) - data['alt'] = data['alt'].astype(float) + data['height'] = data['height'].astype(float) data['type'] = data['type'].astype(int) # Run ampycloud @@ -68,10 +68,10 @@ def test_direct_prms(mpls): # Let's create some data with only NaNs data = pd.DataFrame([['1', 0, 1000, 1], ['1', 60, 1001, 1], ['1', 120, 1002, 1]], - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) data['ceilo'] = data['ceilo'].astype(pd.StringDtype()) data['dt'] = data['dt'].astype(float) - data['alt'] = data['alt'].astype(float) + data['height'] = data['height'].astype(float) data['type'] = data['type'].astype(int) # Run ampycloud @@ -137,10 +137,10 @@ def test_empty_plot(mpls): # Let's create some data with only NaNs data = pd.DataFrame([['1', -100, np.nan, 0], ['1', -99, np.nan, 0]], - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) data['ceilo'] = data['ceilo'].astype(pd.StringDtype()) data['dt'] = data['dt'].astype(float) - data['alt'] = data['alt'].astype(float) + data['height'] = data['height'].astype(float) data['type'] = data['type'].astype(int) # Run ampycloud diff --git a/test/ampycloud/plots/test_tools.py b/test/ampycloud/plots/test_tools.py index 60bbe3a..dc9f217 100644 --- a/test/ampycloud/plots/test_tools.py +++ b/test/ampycloud/plots/test_tools.py @@ -13,7 +13,7 @@ # Import from ampycloud from ampycloud import dynamic -from ampycloud.plots.tools import valid_styles, set_mplstyle +from ampycloud.plots.tools import valid_styles, set_mplstyle, texify def test_valid_styles(): @@ -47,3 +47,12 @@ def check_plot_context(): # Finally, let's check that I can also alter the preamble in context dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] = 'latex' assert 'metsymb' not in check_plot_context()[1] + + +def test_texify(): + """ Test the texify function in case of a given string used with Latex context. """ + + mpl.rcParams['text.usetex'] = True + result = texify("some $fancy$ label with % and _") + assert result == "some $fancy$ label with \\% and \\_" + mpl.rcParams['text.usetex'] = False diff --git a/test/ampycloud/ref_data/Geneva_2018.04.30-18.17.00_SCT100-BKN120.csv b/test/ampycloud/ref_data/Geneva_2018.04.30-18.17.00_SCT100-BKN120.csv index 100eeec..df8d761 100644 --- a/test/ampycloud/ref_data/Geneva_2018.04.30-18.17.00_SCT100-BKN120.csv +++ b/test/ampycloud/ref_data/Geneva_2018.04.30-18.17.00_SCT100-BKN120.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final05,-897.0,11310.0,1 Final05,-897.0,12270.0,2 Final05,-882.0,11280.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2019.01.10-04.45.34_FEW040-BKN070.csv b/test/ampycloud/ref_data/Geneva_2019.01.10-04.45.34_FEW040-BKN070.csv index dbca8c4..1181f72 100644 --- a/test/ampycloud/ref_data/Geneva_2019.01.10-04.45.34_FEW040-BKN070.csv +++ b/test/ampycloud/ref_data/Geneva_2019.01.10-04.45.34_FEW040-BKN070.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-1189.0,7110.0,1 PO,-1174.0,7140.0,1 PO,-1159.0,7170.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2019.01.10-04.50.00_SCT040-BKN070.csv b/test/ampycloud/ref_data/Geneva_2019.01.10-04.50.00_SCT040-BKN070.csv index 00209b8..ec10b06 100644 --- a/test/ampycloud/ref_data/Geneva_2019.01.10-04.50.00_SCT040-BKN070.csv +++ b/test/ampycloud/ref_data/Geneva_2019.01.10-04.50.00_SCT040-BKN070.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-889.0,7200.0,1 PO,-874.0,7200.0,1 PO,-859.0,7200.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2020.10.22.-21.15.00_SCT004-SCT007.csv b/test/ampycloud/ref_data/Geneva_2020.10.22.-21.15.00_SCT004-SCT007.csv index 82a1912..94f7ba0 100644 --- a/test/ampycloud/ref_data/Geneva_2020.10.22.-21.15.00_SCT004-SCT007.csv +++ b/test/ampycloud/ref_data/Geneva_2020.10.22.-21.15.00_SCT004-SCT007.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final05,-886.0,570.0,1 Final05,-871.0,570.0,1 Final05,-856.0,570.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2020.12.29-04.50.00_FEW044-BKN055.csv b/test/ampycloud/ref_data/Geneva_2020.12.29-04.50.00_FEW044-BKN055.csv index 15d7ff6..67d7bb6 100644 --- a/test/ampycloud/ref_data/Geneva_2020.12.29-04.50.00_FEW044-BKN055.csv +++ b/test/ampycloud/ref_data/Geneva_2020.12.29-04.50.00_FEW044-BKN055.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final23,-889.0,6570.0,1 Final23,-874.0,6540.0,1 Final23,-859.0,6600.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.05.01-03.50.00_SCT004-SCT013.csv b/test/ampycloud/ref_data/Geneva_2021.05.01-03.50.00_SCT004-SCT013.csv index 059b5d1..99f97a2 100644 --- a/test/ampycloud/ref_data/Geneva_2021.05.01-03.50.00_SCT004-SCT013.csv +++ b/test/ampycloud/ref_data/Geneva_2021.05.01-03.50.00_SCT004-SCT013.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-889.0,480.0,1 PO,-874.0,480.0,1 PO,-859.0,480.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.05.23-04.20.00_FEW048.csv b/test/ampycloud/ref_data/Geneva_2021.05.23-04.20.00_FEW048.csv index d9c457e..a5d2848 100644 --- a/test/ampycloud/ref_data/Geneva_2021.05.23-04.20.00_FEW048.csv +++ b/test/ampycloud/ref_data/Geneva_2021.05.23-04.20.00_FEW048.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-897.0,,0 PO,-882.0,,0 PO,-867.0,,0 diff --git a/test/ampycloud/ref_data/Geneva_2021.09.09-07.50.00_FEW089_MSA10000.csv b/test/ampycloud/ref_data/Geneva_2021.09.09-07.50.00_FEW089_MSA10000.csv index c179467..eb04a67 100644 --- a/test/ampycloud/ref_data/Geneva_2021.09.09-07.50.00_FEW089_MSA10000.csv +++ b/test/ampycloud/ref_data/Geneva_2021.09.09-07.50.00_FEW089_MSA10000.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-1200.0,11880.0,1 PO,-1185.0,11670.0,1 PO,-1170.0,11400.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.09.19-11.50.00_OVC032.csv b/test/ampycloud/ref_data/Geneva_2021.09.19-11.50.00_OVC032.csv index 0db0e0a..26ef5c6 100644 --- a/test/ampycloud/ref_data/Geneva_2021.09.19-11.50.00_OVC032.csv +++ b/test/ampycloud/ref_data/Geneva_2021.09.19-11.50.00_OVC032.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final23,-896.0,3330.0,1 Final23,-881.0,3330.0,1 Final23,-866.0,3390.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.10.05-03.50.00_BKN014-SCT029.csv b/test/ampycloud/ref_data/Geneva_2021.10.05-03.50.00_BKN014-SCT029.csv index 2a0b216..6e18ec8 100644 --- a/test/ampycloud/ref_data/Geneva_2021.10.05-03.50.00_BKN014-SCT029.csv +++ b/test/ampycloud/ref_data/Geneva_2021.10.05-03.50.00_BKN014-SCT029.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type RWY05,-898.0,3120.0,1 RWY05,-883.0,3120.0,1 RWY05,-868.0,3120.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.10.06-19.20.00_FEW066-SCT073_MSA10000.csv b/test/ampycloud/ref_data/Geneva_2021.10.06-19.20.00_FEW066-SCT073_MSA10000.csv index 9f9531f..8a56458 100644 --- a/test/ampycloud/ref_data/Geneva_2021.10.06-19.20.00_FEW066-SCT073_MSA10000.csv +++ b/test/ampycloud/ref_data/Geneva_2021.10.06-19.20.00_FEW066-SCT073_MSA10000.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final05,-242.0,,0 Final05,-227.0,,0 Final05,-92.0,,0 diff --git a/test/ampycloud/ref_data/Geneva_2021.11.27-16.50.00_FEW037-SCT059.csv b/test/ampycloud/ref_data/Geneva_2021.11.27-16.50.00_FEW037-SCT059.csv index 3f63d4f..fbf9016 100644 --- a/test/ampycloud/ref_data/Geneva_2021.11.27-16.50.00_FEW037-SCT059.csv +++ b/test/ampycloud/ref_data/Geneva_2021.11.27-16.50.00_FEW037-SCT059.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-900.0,5850.0,1 PO,-885.0,5970.0,1 PO,-870.0,5820.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.12.15-06.50.00_OVC001.csv b/test/ampycloud/ref_data/Geneva_2021.12.15-06.50.00_OVC001.csv index 5ebd888..12bd4ac 100644 --- a/test/ampycloud/ref_data/Geneva_2021.12.15-06.50.00_OVC001.csv +++ b/test/ampycloud/ref_data/Geneva_2021.12.15-06.50.00_OVC001.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type RWY05,-891.0,330.0,-1 RWY05,-876.0,330.0,-1 RWY05,-861.0,330.0,-1 diff --git a/test/ampycloud/ref_data/Geneva_2023.07.03-09.35.30_SCT044.csv b/test/ampycloud/ref_data/Geneva_2023.07.03-09.35.30_SCT044.csv index ee02838..b73d3d7 100644 --- a/test/ampycloud/ref_data/Geneva_2023.07.03-09.35.30_SCT044.csv +++ b/test/ampycloud/ref_data/Geneva_2023.07.03-09.35.30_SCT044.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type CL,1688369725.213,4620.0,1 CL,1688369710.293,4620.0,1 CL,1688369695.173,4620.0,1 diff --git a/test/ampycloud/ref_data/Kloten_2020.12.24-01.20.00_FEW018-BKN051.csv b/test/ampycloud/ref_data/Kloten_2020.12.24-01.20.00_FEW018-BKN051.csv index 841375a..636e647 100644 --- a/test/ampycloud/ref_data/Kloten_2020.12.24-01.20.00_FEW018-BKN051.csv +++ b/test/ampycloud/ref_data/Kloten_2020.12.24-01.20.00_FEW018-BKN051.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final_14,-891.0,3540.0,1 Final_14,-876.0,1980.0,1 Final_14,-861.0,5430.0,1 diff --git a/test/ampycloud/ref_data/debug107_2020.01.01-00.01.01_SCT024-SCT031.csv b/test/ampycloud/ref_data/debug107_2020.01.01-00.01.01_SCT024-SCT031.csv index 90809f0..0bf567f 100644 --- a/test/ampycloud/ref_data/debug107_2020.01.01-00.01.01_SCT024-SCT031.csv +++ b/test/ampycloud/ref_data/debug107_2020.01.01-00.01.01_SCT024-SCT031.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type 3,-1181.5671950570443,2672.561745382283,1 0,-1181.4743327263454,2740.7140169791196,1 1,-1172.3053207784762,,0 diff --git a/test/ampycloud/ref_data/debug107_2020.01.01-00.16.01_BKN010-BKN025.csv b/test/ampycloud/ref_data/debug107_2020.01.01-00.16.01_BKN010-BKN025.csv index 6e72ebf..62bdf55 100644 --- a/test/ampycloud/ref_data/debug107_2020.01.01-00.16.01_BKN010-BKN025.csv +++ b/test/ampycloud/ref_data/debug107_2020.01.01-00.16.01_BKN010-BKN025.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type 0,-1197.5049952437246,3262.4809940488567,1 0,-1197.5049952437246,4483.394304396294,2 2,-1197.1492609319444,3073.9448243991233,1 diff --git a/test/ampycloud/test_core.py b/test/ampycloud/test_core.py index 75dc17c..a703a20 100644 --- a/test/ampycloud/test_core.py +++ b/test/ampycloud/test_core.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -45,15 +45,24 @@ def test_reset_prms(): """ Test the reset_prms routine. """ # First, let's change one of the dynamic parameter - ref_val = dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] + ref_val0 = dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] + ref_val8 = dynamic.AMPYCLOUD_PRMS['MAX_HOLES_OKTA8'] + dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = -1 assert dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] == -1 - assert dynamic.AMPYCLOUD_PRMS['SLICING_PRMS']['alt_scale_mode'] == 'minmax-scale' - # Then try to reset it + # Try to reset this prm specifically + reset_prms(which='MAX_HITS_OKTA0') + assert dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] == ref_val0 + assert dynamic.AMPYCLOUD_PRMS['SLICING_PRMS']['height_scale_mode'] == 'minmax-scale' + + # Then try to reset all of them at once + dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = -1 + dynamic.AMPYCLOUD_PRMS['MAX_HOLES_OKTA8'] = -1 reset_prms() - assert dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] == ref_val + assert dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] == ref_val0 + assert dynamic.AMPYCLOUD_PRMS['MAX_HOLES_OKTA8'] == ref_val8 def test_run(): @@ -67,9 +76,9 @@ def test_run(): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 0.5, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 0.5, 'period': 100, 'amplitude': 0}, - {'alt': 2000, 'alt_std': 10, 'sky_cov_frac': 0.5, + {'height': 2000, 'height_std': 10, 'sky_cov_frac': 0.5, 'period': 100, 'amplitude': 0}]) out = run(mock_data) @@ -82,7 +91,7 @@ def test_run(): # Test the ability to specific parameters locally only out = run(mock_data, prms={'MSA': 0}) - assert out.metar_msg() == 'NCD' + assert out.metar_msg() == 'NSC' assert dynamic.AMPYCLOUD_PRMS['MSA'] is None # Test that warnings are being raised if a bad parameter is being given @@ -99,7 +108,7 @@ def test_run_single_point(): # Let's create some data with a single valid point data = pd.DataFrame([['1', -100, 2000, 1], ['1', -99, np.nan, 0]], - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index 75be198..204b3de 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -31,7 +31,7 @@ def test_ceilochunk_init(): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 1, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}]) # The following line is required as long as the mocker module issues mock data with type 99. mock_data.iloc[-1, mock_data.columns.get_loc('type')] = 1 @@ -53,7 +53,7 @@ def test_ceilochunk_init(): # And now again, but this time with a large hit buffer that coverts all the data dynamic.AMPYCLOUD_PRMS['MSA'] = 0 - dynamic.AMPYCLOUD_PRMS['MSA_HIT_BUFFER'] = mock_data['alt'].max() + 10 + dynamic.AMPYCLOUD_PRMS['MSA_HIT_BUFFER'] = mock_data['height'].max() + 10 chunk = CeiloChunk(mock_data) assert len(chunk.data) == len(mock_data) @@ -67,9 +67,9 @@ def test_ceilochunk_init(): assert dynamic.AMPYCLOUD_PRMS['MSA'] == 0 # Check the ability to set nested parameters in one go - chunk = CeiloChunk(mock_data, prms={'GROUPING_PRMS': {'alt_pad_perc': 'test'}}) - assert chunk.prms['GROUPING_PRMS']['alt_pad_perc'] == 'test' - assert dynamic.AMPYCLOUD_PRMS['GROUPING_PRMS']['alt_pad_perc'] == +10 + chunk = CeiloChunk(mock_data, prms={'GROUPING_PRMS': {'height_pad_perc': 'test'}}) + assert chunk.prms['GROUPING_PRMS']['height_pad_perc'] == 'test' + assert dynamic.AMPYCLOUD_PRMS['GROUPING_PRMS']['height_pad_perc'] == +10 # Check that warnings are raised in case bad parameters are given with warns(AmpycloudWarning): @@ -80,6 +80,33 @@ def test_ceilochunk_init(): reset_prms() +@mark.parametrize('height,expected_flag', [ + param(1000, False, id='low clouds'), + param(15000, True, id='high clouds'), +]) +def test_clouds_above_msa_buffer_flag(height: int, expected_flag: bool): + """ Test the high clouds flagging routine. """ + + dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = 3 + dynamic.AMPYCLOUD_PRMS['MSA'] = 10000 + dynamic.AMPYCLOUD_PRMS['MSA_HIT_BUFFER'] = 1000 + + n_ceilos = 4 + lookback_time = 1200 + rate = 30 + # Create some fake data to get started + # 1 very flat layer with no gaps + mock_data = mocker.mock_layers( + n_ceilos, lookback_time, rate, [{ + 'height': height, 'height_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0 + }] + ) + chunk = CeiloChunk(mock_data) + assert chunk.clouds_above_msa_buffer == expected_flag + + reset_prms() + + def test_ceilochunk_basic(): """ Test the basic methods of the CeiloChunk class. """ @@ -90,7 +117,7 @@ def test_ceilochunk_basic(): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 1, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}]) # Instantiate a CeiloChunk entity ... @@ -139,7 +166,7 @@ def test_ceilochunk_basic(): assert chunk.metar_msg(which='groups') == 'OVC009' -@mark.parametrize('altitude1,altitude2,altitude3,ngroups_expected', [ +@mark.parametrize('height1,height2,height3,ngroups_expected', [ param(250., 800., 1200., 3, id='gt min sep'), param(250., 500., 1200., 3, id='eq min sep'), param(250., 350., 1200., 2, id='gt min sep'), @@ -147,16 +174,16 @@ def test_ceilochunk_basic(): param(250., 350., 400., 1, id='multiple overlaps with second merge'), ]) def test_group_separation( - altitude1: float, altitude2: float, altitude3: float, ngroups_expected: int + height1: float, height2: float, height3: float, ngroups_expected: int ): """Test if the separation of close groups works as intended.""" - #create some fake data: - fake_hits = [altitude1] * 50 + [altitude2] * 50 + [altitude3] * 50 + # create some fake data: + fake_hits = [height1] * 50 + [height2] * 50 + [height3] * 50 fake_data = pd.DataFrame({ 'ceilo': ['Ceilometer.PO'] * 150, 'dt': [t for t in range(-1500, 0, 10)], - 'alt': fake_hits, + 'height': fake_hits, 'type': [1] * 150, }) fake_grp_ids = [1] * 50 + [2] * 50 + [3] * 50 @@ -173,6 +200,7 @@ def test_group_separation( reset_prms() + def test_bad_layer_sep_lims(): """ Test that giving problematic layer separation limits does raise an error. """ @@ -187,7 +215,7 @@ def test_bad_layer_sep_lims(): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 1, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}]) # Instantiate a CeiloChunk entity ... @@ -213,7 +241,7 @@ def test_ceilochunk_nocld(): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 0, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 0, 'period': 100, 'amplitude': 0}]) # Instantiate a CeiloChunk entity ... @@ -228,6 +256,41 @@ def test_ceilochunk_nocld(): assert chunk.metar_msg() == 'NCD' +@mark.parametrize('height', [ + param(10500, id='in buffer'), + param(15000, id='above buffer'), +]) +def test_ceilochunk_highcld(height): + """ Test the methods of CeiloChunks when high clouds are seen in the interval. """ + + dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = 3 + dynamic.AMPYCLOUD_PRMS['MSA'] = 10000 + dynamic.AMPYCLOUD_PRMS['MSA_HIT_BUFFER'] = 1000 + + n_ceilos = 4 + lookback_time = 1200 + rate = 30 + # Create some fake data to get started + # 1 very flat layer with no gaps + mock_data = mocker.mock_layers( + n_ceilos, lookback_time, rate, + [{'height': height, 'height_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0}] + ) + + # Instantiate a CeiloChunk entity ... + chunk = CeiloChunk(mock_data) + + # Do the dance ... + chunk.find_slices() + chunk.find_groups() + chunk.find_layers() + + # Assert the final METAR code is correct + assert chunk.metar_msg() == 'NSC' + + reset_prms() + + def test_ceilochunk_2lay(): """ Test the methods of CeiloChunks when 2 layers are seen in the interval. """ @@ -237,9 +300,9 @@ def test_ceilochunk_2lay(): # Create some fake data to get started mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 0.5, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 0.5, 'period': 100, 'amplitude': 0}, - {'alt': 2000, 'alt_std': 10, 'sky_cov_frac': 0.5, + {'height': 2000, 'height_std': 10, 'sky_cov_frac': 0.5, 'period': 100, 'amplitude': 0}]) # Instantiate a CeiloChunk entity ... @@ -259,9 +322,8 @@ def test_layering_singlepts(): mock_data = pd.DataFrame(np.array([['dummy', -1, 2300, 1], ['dummy', -1, 4000, 2], - ['dummy', -1, 4500, 3], - ['dummy', -1, np.nan, 0]]), - columns=['ceilo', 'dt', 'alt', 'type']) + ['dummy', -1, 4500, 3]]), + columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): @@ -282,12 +344,12 @@ def test_layering_singlepts(): def test_layering_singleval(): - """ Test the layering step when there is a single altitude value. See #76 for details. """ + """ Test the layering step when there is a single height value. See #76 for details. """ data = np.array([np.ones(30), np.arange(0, 30, 1), np.ones(30)*170000, np.ones(30)]) mock_data = pd.DataFrame(data.T, - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): @@ -311,7 +373,7 @@ def test_coplanar_hull(): data = np.array([np.ones(10), np.arange(0, 10, 1), np.arange(0, 10, 1), np.ones(10)]) mock_data = pd.DataFrame(data.T, - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): @@ -328,13 +390,13 @@ def test_coplanar_hull(): def test_layering_dualeval(): - """ Test the layering step when there are two single altitude values. See #78 for details. """ + """ Test the layering step when there are two single height values. See #78 for details. """ data1 = np.array([np.ones(30), np.arange(0, 30, 1), np.ones(30)*120, np.ones(30)*1]) data2 = np.array([np.ones(30), np.arange(0, 30, 1), np.ones(30)*150, np.ones(30)*2]) mock_data = pd.DataFrame(np.concatenate([data1, data2], axis=1).T, - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): diff --git a/test/ampycloud/test_fluffer.py b/test/ampycloud/test_fluffer.py index da8d90b..af1f627 100644 --- a/test/ampycloud/test_fluffer.py +++ b/test/ampycloud/test_fluffer.py @@ -30,7 +30,7 @@ def test_get_fluffiness(): assert out[0] == 0 assert np.array_equal(out[1][:, 1], pts[:, 1]) - # Test the LOWESS warning issued with duplicat points + # Test the LOWESS warning issued with duplicate points pts = np.array([[0, 0], [0, 1], [1, 0]]) # For some reason, pytest refuses to convert the LOWESS warning into an error ... # ... so I need to catch it a bit differently ... sigh ! diff --git a/test/ampycloud/test_layer.py b/test/ampycloud/test_layer.py index a217335..27e0683 100644 --- a/test/ampycloud/test_layer.py +++ b/test/ampycloud/test_layer.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2023 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -93,10 +93,10 @@ def test_unstable_layers(): data = pd.read_csv(f) # Drop anything below 6500 ft - data = data.drop(data[data['alt'] < 6500].index) + data = data.drop(data[data['height'] < 6500].index) # Let's run the the Gaussian Mixture Modelling 100 times ... - out = [ncomp_from_gmm(data['alt'].to_numpy(), + out = [ncomp_from_gmm(data['height'].to_numpy(), scores='BIC', rescale_0_to_x=100, min_sep=200, random_seed=45, @@ -108,7 +108,7 @@ def test_unstable_layers(): # Now do it once, but checking that overly-thin layers do not get split-up # Do I issue a warning if the min_sep is dangerously small ? with pytest.warns(AmpycloudWarning): - best_ncomp, _, _ = ncomp_from_gmm(data['alt'].to_numpy(), + best_ncomp, _, _ = ncomp_from_gmm(data['height'].to_numpy(), scores='BIC', rescale_0_to_x=100, min_sep=0, random_seed=39, @@ -116,7 +116,7 @@ def test_unstable_layers(): # With this specific seed, I should be finding 3 layers assert best_ncomp == 3 # With this other seed, I should get 2 components - best_ncomp, _, _ = ncomp_from_gmm(data['alt'].to_numpy(), + best_ncomp, _, _ = ncomp_from_gmm(data['height'].to_numpy(), scores='BIC', rescale_0_to_x=100, min_sep=0, random_seed=45, @@ -124,7 +124,7 @@ def test_unstable_layers(): assert best_ncomp == 2 # Once I take into account a suitable min_sep, do the layers not get split anymore ? - best_ncomp, _, _ = ncomp_from_gmm(data['alt'].to_numpy(), + best_ncomp, _, _ = ncomp_from_gmm(data['height'].to_numpy(), scores='BIC', rescale_0_to_x=100, min_sep=200, random_seed=45, diff --git a/test/ampycloud/test_wmo.py b/test/ampycloud/test_wmo.py index 9356143..772a6a7 100644 --- a/test/ampycloud/test_wmo.py +++ b/test/ampycloud/test_wmo.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -12,7 +12,7 @@ import numpy as np # Import from this package -from ampycloud.wmo import perc2okta, alt2code +from ampycloud.wmo import perc2okta, height2code def test_perc2okta(): @@ -24,10 +24,10 @@ def test_perc2okta(): np.array([0, 1, 2, 3, 4, 5, 6, 7, 8])) -def test_alt2code(): +def test_height2code(): """ Test the alt2code function, inculding the descaling mode. """ - assert alt2code(99.9) == '000' - assert alt2code(100) == '001' - assert alt2code(5555) == '055' - assert alt2code(12345) == '120' + assert height2code(99.9) == '000' + assert height2code(100) == '001' + assert height2code(5555) == '055' + assert height2code(12345) == '120' diff --git a/test/ampycloud/utils/test_mocker.py b/test/ampycloud/utils/test_mocker.py index 333452d..a7c311e 100644 --- a/test/ampycloud/utils/test_mocker.py +++ b/test/ampycloud/utils/test_mocker.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -23,7 +23,8 @@ def test_mock_layers(): n_ceilos = 1 lookback_time = 1200 hit_gap = 60 - layer_prms = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}] + layer_prms = [{'height': 1000, 'height_std': 100, 'sky_cov_frac': 1, 'period': 100, + 'amplitude': 0}] out = mock_layers(n_ceilos, lookback_time, hit_gap, layer_prms) # Correct type ? @@ -32,7 +33,7 @@ def test_mock_layers(): # Correct number of points ? assert len(out) == 1200/60 # No holes ? - assert not np.any(out['alt'].isna()) + assert not np.any(out['height'].isna()) assert not np.any(out['dt'].isna()) # Ordered chronologically ? assert out['dt'].is_monotonic_increasing @@ -41,24 +42,25 @@ def test_mock_layers(): n_ceilos = 2 lookback_time = 1200 hit_gap = 60 - layer_prms = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 0.5, 'period': 100, 'amplitude': 0}] + layer_prms = [{'height': 1000, 'height_std': 100, 'sky_cov_frac': 0.5, 'period': 100, + 'amplitude': 0}] out = mock_layers(n_ceilos, lookback_time, hit_gap, layer_prms) # Correct number of points ? assert len(out) == 1200/60 * n_ceilos # Holes present ? - assert np.any(out['alt'].isna()) + assert np.any(out['height'].isna()) assert not np.any(out['dt'].isna()) # In good numbers ? - assert len(out[out['alt'].isna()]) == len(out)/2 + assert len(out[out['height'].isna()]) == len(out)/2 # Now with more than 1 layer n_ceilos = 2 lookback_time = 1200 hit_gap = 60 - layer_prms = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 1, + layer_prms = [{'height': 1000, 'height_std': 100, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}, - {'alt': 10000, 'alt_std': 200, 'sky_cov_frac': 1, + {'height': 10000, 'height_std': 200, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}] out = mock_layers(n_ceilos, lookback_time, hit_gap, layer_prms) @@ -69,14 +71,14 @@ def test_mock_layers(): n_ceilos = 2 lookback_time = 1200 hit_gap = 60 - layer_prms = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 1, + layer_prms = [{'height': 1000, 'height_std': 100, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}, - {'alt': 15000, 'alt_std': 200, 'sky_cov_frac': 1, + {'height': 15000, 'height_std': 200, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}] out = mock_layers(n_ceilos, lookback_time, hit_gap, layer_prms) # Holes present ? There should be None, since we have a second layer complete - assert np.any(~out['alt'].isna()) + assert np.any(~out['height'].isna()) assert not np.any(out['dt'].isna()) # Make sur I have the correct number of timesteps diff --git a/test/ampycloud/utils/test_utils.py b/test/ampycloud/utils/test_utils.py index a1908c0..40569e2 100644 --- a/test/ampycloud/utils/test_utils.py +++ b/test/ampycloud/utils/test_utils.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -16,7 +16,7 @@ # Import from ampycloud from ampycloud.utils.utils import (check_data_consistency, tmp_seed, - adjust_nested_dict, calc_base_alt) + adjust_nested_dict, calc_base_height) from ampycloud.utils.mocker import canonical_demo_data from ampycloud.errors import AmpycloudError, AmpycloudWarning from ampycloud import hardcoded @@ -35,59 +35,90 @@ def test_check_data_consistency(): assert np.all(out == canonical_demo_data()) # Now, let's check specific elements that should raise errors or warnings + ### ERRORS ### with raises(AmpycloudError): # Empty DataFrame - data = pd.DataFrame(columns=['ceilo', 'dt', 'alt', 'type']) + data = pd.DataFrame(columns=['ceilo', 'dt', 'height', 'type']) check_data_consistency(data) with raises(AmpycloudError): # Missing column - data = pd.DataFrame(np.array([['a', 1, 1]]), columns=['ceilo', 'alt', 'type']) - for col in ['ceilo', 'alt', 'type']: + data = pd.DataFrame(np.array([['a', 1, 1]]), columns=['ceilo', 'height', 'type']) + for col in ['ceilo', 'height', 'type']: data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) check_data_consistency(data) + with raises(AmpycloudError): + # Duplicated hit + data = pd.DataFrame(np.array([['a', 0., 1, 1], ['a', 0., 1, 1]]), + columns=['ceilo', 'dt', 'height', 'type']) + for col in ['ceilo', 'dt', 'height', 'type']: + data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) + check_data_consistency(data) + with raises(AmpycloudError): + # Inconsistent hits - type 0 vs type !0 + data = pd.DataFrame(np.array([['a', 0, 1, 1], ['a', 0, np.nan, 0]]), + columns=['ceilo', 'dt', 'height', 'type']) + for col in ['ceilo', 'dt', 'height', 'type']: + data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) + check_data_consistency(data) + with raises(AmpycloudError): + # Inconsistent vv hits - it must be either a VV hit, or a hit, but not both. + data = pd.DataFrame(np.array([['a', 0, 1, -1], ['a', 0, 2, 1]]), + columns=['ceilo', 'dt', 'height', 'type']) + for col in ['ceilo', 'dt', 'height', 'type']: + data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) + check_data_consistency(data) + + # The following should NOT raise an error, i.e. two simultaneous hits from *distinct* parameters + data = pd.DataFrame(np.array([['a', 0, 1, -1], ['b', 0, np.nan, 0]]), + columns=['ceilo', 'dt', 'height', 'type']) + for col in ['ceilo', 'dt', 'height', 'type']: + data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) + check_data_consistency(data) + + ### WARNINGS ### with warns(AmpycloudWarning): # Bad data type - data = pd.DataFrame(np.array([['a', 0, 1, 1]]), columns=['ceilo', 'dt', 'alt', 'type']) + data = pd.DataFrame(np.array([['a', 0, 1, 1]]), columns=['ceilo', 'dt', 'height', 'type']) check_data_consistency(data) with warns(AmpycloudWarning): # Extra key data = pd.DataFrame(np.array([['a', 0, 1, 1, 99]]), - columns=['ceilo', 'dt', 'alt', 'type', 'extra']) + columns=['ceilo', 'dt', 'height', 'type', 'extra']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) with warns(AmpycloudWarning): - # Negative alts + # Negative heights data = pd.DataFrame(np.array([['a', 0, -1, 1]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) with warns(AmpycloudWarning): # Type 0 should be NaN data = pd.DataFrame(np.array([['a', 0, 1, 0]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) with warns(AmpycloudWarning): # Type 1 should not be NaN data = pd.DataFrame(np.array([['a', 0, np.nan, 1]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) with warns(AmpycloudWarning): # Missing type 1 pts data = pd.DataFrame(np.array([['a', 0, 1, 2]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) with warns(AmpycloudWarning): # Missing type 2 pts data = pd.DataFrame(np.array([['a', 0, 1, 3]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) @@ -142,17 +173,17 @@ def test_adjust_nested_dict(): assert out == new_dict -@mark.parametrize('lookback_perc,alt_perc,q_expected', [ +@mark.parametrize('lookback_perc,height_perc,q_expected', [ param(50, 90, 95, id='both params'), - param(100, 98, 98, id='alt_perc only'), + param(100, 98, 98, id='height_perc only'), param(42, 100, 100., id='lookback_only'), ]) -def test_calc_base_alt( +def test_calc_base_height( lookback_perc: int, - alt_perc: int, + height_perc: int, q_expected: np.float64, ): - """Test the calculation of the slice/ group/ layer base altitude.""" + """Test the calculation of the slice/ group/ layer base height.""" vals = np.arange(1., 101.) - q = calc_base_alt(vals, lookback_perc, alt_perc) + q = calc_base_height(vals, lookback_perc, height_perc) np.testing.assert_almost_equal(q, q_expected, decimal=1)