Skip to content

Commit

Permalink
Merge pull request #108 from MeteoSwiss/bugfix/AMAROC-681-double-layers
Browse files Browse the repository at this point in the history
Bugfix/amaroc 681 double layers
  • Loading branch information
regDaniel authored Nov 14, 2023
2 parents 9011e29 + 2d6662d commit 52d643f
Show file tree
Hide file tree
Showing 14 changed files with 1,483 additions and 148 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dist/
eggs/
*.egg-info/
docs/source/modules/
.venv/

# MacOS stuff
*.DS_Store
Expand Down
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Scientific Developers:
- @fpavogt (Frédéric P.A. Vogt), [email protected]
- @regDaniel (Daniel Regenass), [email protected]
15 changes: 8 additions & 7 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ The format is inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v1.0.0]
### Added:
### Fixed:
### Changed:
- [fpavogt, 2023-11-05] Updated version to 1.0.0 for first full release, incl. misc documentation changes.
### Deprecated:
### Removed:
### Security:
### Added
- [regDaniel, 2023-11-09] Add minimum separation condition for grouping step
### Fixed
- [regDaniel, 2023-11-09] Fix #107 (no more reporting of two layers at same height)
### Changed
- [fpavogt, 2023-11-05] Updated version to 1.0.0 for first full release, incl. misc documentation changes.
- [regDaniel, 2023-11-09] Changed min sep in layering from layer mean to layer base altitude
- [regDaniel, 2023-11-10] Refactoring (modularization) of metarize method

## [v0.6.0.dev0]
### Added:
Expand Down
438 changes: 321 additions & 117 deletions src/ampycloud/data.py

Large diffs are not rendered by default.

18 changes: 14 additions & 4 deletions src/ampycloud/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta',
def ncomp_from_gmm(vals: np.ndarray,
ncomp_max: int = 3,
min_sep: Union[int, float] = 0,
layer_base_params: dict[str, int] = {'lookback_perc': 100, 'alt_perc': 5},
scores: str = 'BIC',
rescale_0_to_x: float = None,
random_seed: int = 42,
Expand Down Expand Up @@ -190,6 +191,9 @@ def ncomp_from_gmm(vals: np.ndarray,
if len(np.unique(vals_orig)) == 1:
logger.debug('Skipping the GMM computation: all the values are the same.')
return (1, np.zeros(len(vals_orig)), None)
elif len(np.unique(vals_orig)) < ncomp_max:
ncomp_max = len(np.unique(vals_orig))
warnings.warn(f'Restricting ncomp_max to the max number of individual values: {ncomp_max}')

# Estimate the resolution of the data (by measuring the minimum separation between two data
# points).
Expand Down Expand Up @@ -240,16 +244,22 @@ def ncomp_from_gmm(vals: np.ndarray,
return best_ncomp, best_ids, abics

# If I found more than one component, let's make sure that they are sufficiently far apart.
# First, let's compute the mean component heights
mean_comp_heights = [np.mean(vals_orig[best_ids == i]) for i in range(ncomp[best_model_ind])]
# First, let's compute the component base altitude
base_comp_heights = [
utils.calc_base_alt(
vals_orig[best_ids == i].flatten(),
layer_base_params['lookback_perc'],
layer_base_params['alt_perc']
) for i in range(ncomp[best_model_ind])
]

# These may not be ordered, so let's keep track of the indices
# First, let's deal with the fact that they are not ordered.
comp_ids = np.argsort(mean_comp_heights)
comp_ids = np.argsort(base_comp_heights)

# Now loop throught the different components, check if they are sufficiently far apart,
# and merge them otherwise.
for (ind, delta) in enumerate(np.diff(np.sort(mean_comp_heights))):
for (ind, delta) in enumerate(np.diff(np.sort(base_comp_heights))):

# If the the delta is large enough, move on ...
if delta >= min_sep:
Expand Down
6 changes: 3 additions & 3 deletions src/ampycloud/plots/diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def show_slices(self) -> None:

# Which hits are in the slice ?
in_slice = np.array(self._chunk.data['slice_id'] ==
self._chunk.slices.at[ind, 'original_id'])
self._chunk.slices.at[ind, 'cluster_id'])

# Create an array of facecolors ... choose them from my set of colors
base_clr = all_clrs[ind % len(all_clrs)]
Expand Down Expand Up @@ -261,7 +261,7 @@ def show_groups(self, show_points: bool = False) -> None:
if show_points:
# Which hits are in the group ?
in_group = np.array(self._chunk.data['group_id'] ==
self._chunk.groups.at[ind, 'original_id'])
self._chunk.groups.at[ind, 'cluster_id'])

# I can finally show the points ...
self._axs[0].scatter(self._chunk.data[in_group]['dt'],
Expand Down Expand Up @@ -308,7 +308,7 @@ def show_layers(self) -> None:

# Which hits are in the layer?
in_layer = np.array(self._chunk.data['layer_id'] ==
self._chunk.layers.at[ind, 'original_id'])
self._chunk.layers.at[ind, 'cluster_id'])

# I can finally show the points ...
self._axs[0].scatter(self._chunk.data[in_layer]['dt'],
Expand Down
23 changes: 12 additions & 11 deletions src/ampycloud/prms/ampycloud_default_prms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,18 @@ LOWESS:
# Number of LOWESS fitting iterations
it: 3

# Minimum separation (in ft) between the base altitudes of identified layers inside groups
# with a base altitude in the bins set by min_sep_lims. Any layers separated by less than this
# value will be remerged into one layer. Any value < than 5 times the vertical resolution of the data will
# raise a warning, as it may lead to an over-layering of groups. For the ampycloud METAR
# messages to be consistent with the ICAO rules of layer selection, this should be >=100 ft
# below 10'000 ft.
MIN_SEP_VALS: [250, 1000]
# Altitude threshold (in ft) separating the min_sep_val elements.
# The 0 and + infty extrema will be automatically added to the list by ampycloud.
# Just list here the intermediate steps.
MIN_SEP_LIMS: [10000]

# Slicing parameters
SLICING_PRMS:
# Clustering distance threshold
Expand Down Expand Up @@ -76,17 +88,6 @@ GROUPING_PRMS:
LAYERING_PRMS:
# Minimum okta value below which cluster splitting is not attempted
min_okta_to_split: 2
# Minimum separation (in ft) between the mean altitude of identified layers inside groups
# with a base altitude in the bins set by min_sep_lims. Any layers separated by less than this
# value will not be split. Any value < than 5 times the vertical resolution of the data will
# raise a warning, as it may lead to an over-layering of groups. For the ampycloud METAR
# messages to be consistent with the ICAO rules of layer selection, this should be >=100 ft
# below 10'000 ft.
min_sep_vals: [250, 1000]
# Altitude threshold (in ft) separating the min_sep_val elements.
# The 0 and + infty extrema will be automatically added to the list by ampycloud.
# Just list here the intermediate steps.
min_sep_lims: [10000]
gmm_kwargs:
# Whether to use 'BIC' or 'AIC' scores to decide which model is "best".
scores: BIC
Expand Down
33 changes: 33 additions & 0 deletions src/ampycloud/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import contextlib
import copy
import numpy as np
import numpy.typing as npt
import pandas as pd

# Import from this package
Expand Down Expand Up @@ -238,3 +239,35 @@ def adjust_nested_dict(ref_dict: dict, new_dict: dict, lvls: list = None) -> dic
ref_dict[key] = item

return ref_dict


def calc_base_alt(
vals: npt.ArrayLike,
lookback_perc: int,
alt_perc: int,
) -> float:
"""Calculate the layer base altitude.
Args:
vals (npt.ArrayLike): Ceilometer hits of a given layer. Must be a flat
array/ Series of scalars and ordered in time, most recent entries last.
lookback_perc (int): Percentage of points to take into account. 100% would
correspond to all points, 50% to the recent half, etc.
alt_perc (int): Percentage of points that should be neglected when calculating
the base height. Base height will be the minimum of the remaining points.
Returns:
float: The layer base altitude.
Raises:
AmpycloudError: Raised if the array passed to the n_largest percentile calculation
is empty.
"""
n_latest_elements = vals[- int(len(vals) * lookback_perc / 100):]
if len(n_latest_elements) == 0:
raise AmpycloudError(
'Cloud base calculation got an empty array.'
'Maybe check lookback percentage (is set to %i)' %lookback_perc
)
return np.percentile(n_latest_elements, alt_perc)
Loading

0 comments on commit 52d643f

Please sign in to comment.