From 9de97c0eae8f407cb728b648aa9492d1bf47a4bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Wed, 19 Jun 2024 09:02:53 +0000 Subject: [PATCH 01/22] WIP --- pyaerocom/aeroval/coldatatojson_engine.py | 5 +++++ pyaerocom/aeroval/mda8.py | 23 +++++++++++++++++++++++ tests/aeroval/test_mda8.py | 17 +++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 pyaerocom/aeroval/mda8.py create mode 100644 tests/aeroval/test_mda8.py diff --git a/pyaerocom/aeroval/coldatatojson_engine.py b/pyaerocom/aeroval/coldatatojson_engine.py index 67cb22933..b19c5b3c2 100644 --- a/pyaerocom/aeroval/coldatatojson_engine.py +++ b/pyaerocom/aeroval/coldatatojson_engine.py @@ -32,6 +32,7 @@ ) from pyaerocom.aeroval.exceptions import ConfigError from pyaerocom.aeroval.json_utils import write_json +from pyaerocom.aeroval.mda8 import calc_mda8 from pyaerocom.exceptions import TemporalResolutionError logger = logging.getLogger(__name__) @@ -151,6 +152,10 @@ def process_coldata(self, coldata: ColocatedData): else: obs_var = model_var = "UNDEFINED" + if use_fairmode: + if "conco3" in coldata.data.variable: + coldata = calc_mda8(coldata, "conco3mda8", "conco3mda8") + model_name = coldata.model_name obs_name = coldata.obs_name diff --git a/pyaerocom/aeroval/mda8.py b/pyaerocom/aeroval/mda8.py new file mode 100644 index 000000000..d5c11727a --- /dev/null +++ b/pyaerocom/aeroval/mda8.py @@ -0,0 +1,23 @@ +import logging + +import xarray as xr + +from pyaerocom.colocation.colocated_data import ColocatedData + +logger = logging.getLogger(__name__) + + +def calc_mda8(coldata: ColocatedData, /, invar, outvar): + data = coldata.data + ravg = data.rolling(time=8, min_periods=6).mean() + + daily_max = ravg.resample(time="1D").max() + + if isinstance(data, xr.DataArray): + data = data.to_dataset(name=invar) + + data[outvar] = daily_max + + coldata.data = data.to_dataarray() + + return coldata diff --git a/tests/aeroval/test_mda8.py b/tests/aeroval/test_mda8.py new file mode 100644 index 000000000..90f02be89 --- /dev/null +++ b/tests/aeroval/test_mda8.py @@ -0,0 +1,17 @@ +import logging + +import pytest + +from pyaerocom.aeroval.mda8 import calc_mda8 +from pyaerocom.colocation.colocated_data import ColocatedData +from tests.fixtures.collocated_data import EXAMPLE_FILE + +logger = logging.getLogger(__name__) + + +def test_calc_mda8(): + coldata = ColocatedData(EXAMPLE_FILE) + + coldata2 = calc_mda8(coldata, "od550aer", "od550aer2") + + pass From 11cd372e4574faa3778f7d37ed201889b4bbfb95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Mon, 24 Jun 2024 08:14:19 +0000 Subject: [PATCH 02/22] MDA8 implementation --- pyaerocom/mda8/__init__.py | 0 pyaerocom/mda8/mda8.py | 32 ++++++++++++++++++++++ tests/mda8/__init__.py | 0 tests/mda8/test_mda8.py | 55 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 pyaerocom/mda8/__init__.py create mode 100644 pyaerocom/mda8/mda8.py create mode 100644 tests/mda8/__init__.py create mode 100644 tests/mda8/test_mda8.py diff --git a/pyaerocom/mda8/__init__.py b/pyaerocom/mda8/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/mda8/mda8.py new file mode 100644 index 000000000..8a929f9a6 --- /dev/null +++ b/pyaerocom/mda8/mda8.py @@ -0,0 +1,32 @@ +import numpy as np +import xarray as xr + + +def calc_mda8(data: xr.DataArray) -> float: + def min_periods_max(x: np.ndarray, /, min_periods=1): + """Calculates the max of a 1-dimensional array, returning + nan if not a minimum set of valid values exist. + + :param x: 1-dimensional ndarray. + :param min_periods: minimum required non-nan values, defaults to 1 + :return: A single value, which is either nan or a float. + """ + if not x.ndim == 1: + raise ValueError(f"Unexpected number of dimensions. Got {x.ndim}, expected 1.") + + mask = ~np.isnan(x) + length = np.sum(mask) + if length >= min_periods: + mx = np.nanmax(x) + return mx + + else: + return np.nan + + ravg = data.rolling(time=8, min_periods=6).mean() + + daily_max = ravg.resample(time="1D").reduce( + lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=16) + ) + + return daily_max diff --git a/tests/mda8/__init__.py b/tests/mda8/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/mda8/test_mda8.py b/tests/mda8/test_mda8.py new file mode 100644 index 000000000..dddd70292 --- /dev/null +++ b/tests/mda8/test_mda8.py @@ -0,0 +1,55 @@ +import math +from math import nan + +import numpy as np +import pytest +import xarray as xr + +from pyaerocom.mda8.mda8 import calc_mda8 + + +@pytest.fixture +def example_data() -> xr.DataArray: + arr = xr.DataArray( + [ + [ + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + [0], + ], + ], + dims=["data_source", "time", "station_name"], + coords={"time": xr.date_range(start="2024-01-01", end="2024-01-02", freq="1h")}, + ) + arr.attrs["ts_type"] = "hourly" + return arr + + +def test_calc_mda8(example_data): + mda8 = calc_mda8(example_data) + + assert mda8.shape == (1, 2, 1) + assert mda8.values[0][0][0] == 0 + assert math.isnan(mda8.values[0][1][0]) From d8245bdfd63d24641ba83e84c4bc01fde651948a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Mon, 24 Jun 2024 11:25:03 +0000 Subject: [PATCH 03/22] Small fix --- pyaerocom/mda8/mda8.py | 16 +++++++- tests/mda8/test_mda8.py | 81 ++++++++++++++++++++--------------------- 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/mda8/mda8.py index 8a929f9a6..ad0f32644 100644 --- a/pyaerocom/mda8/mda8.py +++ b/pyaerocom/mda8/mda8.py @@ -2,8 +2,17 @@ import xarray as xr -def calc_mda8(data: xr.DataArray) -> float: - def min_periods_max(x: np.ndarray, /, min_periods=1): +def calc_mda8(data: xr.DataArray) -> xr.DataArray: + """Calculates the daily max 8h average for an array: + + :param data: The DataArray for which to calculate the mda8. Input + should be a DataArray with dimensions ["data_source", "time", "station_name"] + (ie. the format of ColocatedData.data) representing hourly data. + :return: Equivalently structured DataArray, resampled along the "time" + dimension. + """ + + def min_periods_max(x: np.ndarray, /, min_periods=1) -> float: """Calculates the max of a 1-dimensional array, returning nan if not a minimum set of valid values exist. @@ -23,6 +32,9 @@ def min_periods_max(x: np.ndarray, /, min_periods=1): else: return np.nan + # TODO: This function should check some underlying assumptions such as the dimensions, + # that the data represents hourly data and so on. + ravg = data.rolling(time=8, min_periods=6).mean() daily_max = ravg.resample(time="1D").reduce( diff --git a/tests/mda8/test_mda8.py b/tests/mda8/test_mda8.py index dddd70292..ceff13247 100644 --- a/tests/mda8/test_mda8.py +++ b/tests/mda8/test_mda8.py @@ -1,6 +1,3 @@ -import math -from math import nan - import numpy as np import pytest import xarray as xr @@ -8,48 +5,48 @@ from pyaerocom.mda8.mda8 import calc_mda8 -@pytest.fixture -def example_data() -> xr.DataArray: +@pytest.mark.parametrize( + "time,values,exp_length,exp_mda8", + ( + pytest.param( + xr.date_range(start="2024-01-01", periods=49, freq="1h"), + [0] * 49, + 3, + [0, 0, np.nan], + id="zeros", + ), + pytest.param( + xr.date_range(start="2024-01-01", periods=49, freq="1h"), + np.linspace(start=1, stop=49, num=49), + 3, + [20.5, 44.5, np.nan], + id="incrementing-by-1", + ), + pytest.param( + xr.date_range(start="2024-01-01 12:00:00", periods=49, freq="1h"), + np.concatenate( # Sequence 1-7, 3x nan, 11-14, 3x nan, 28-49 + ( + np.arange(start=1, stop=8), + [np.nan] * 3, + np.arange(start=11, stop=25), + [np.nan] * 3, + np.arange(start=28, stop=50), + ) + ), + 3, + [np.nan] * 3, + ), + ), +) +def test_calc_mda8(time, values, exp_length, exp_mda8): arr = xr.DataArray( - [ - [ - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - [0], - ], - ], + [[[x] for x in values]], dims=["data_source", "time", "station_name"], - coords={"time": xr.date_range(start="2024-01-01", end="2024-01-02", freq="1h")}, + coords={"time": time}, ) - arr.attrs["ts_type"] = "hourly" - return arr + mda8 = calc_mda8(arr) -def test_calc_mda8(example_data): - mda8 = calc_mda8(example_data) + assert mda8.shape[1] == exp_length - assert mda8.shape == (1, 2, 1) - assert mda8.values[0][0][0] == 0 - assert math.isnan(mda8.values[0][1][0]) + np.testing.assert_array_equal(mda8[0, :, 0], exp_mda8) From c2954db1a4682b85ce60d99a32bf5ccac31e0b61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Mon, 24 Jun 2024 11:29:59 +0000 Subject: [PATCH 04/22] Remove old files --- pyaerocom/aeroval/mda8.py | 23 ----------------------- tests/aeroval/test_mda8.py | 17 ----------------- 2 files changed, 40 deletions(-) delete mode 100644 pyaerocom/aeroval/mda8.py delete mode 100644 tests/aeroval/test_mda8.py diff --git a/pyaerocom/aeroval/mda8.py b/pyaerocom/aeroval/mda8.py deleted file mode 100644 index d5c11727a..000000000 --- a/pyaerocom/aeroval/mda8.py +++ /dev/null @@ -1,23 +0,0 @@ -import logging - -import xarray as xr - -from pyaerocom.colocation.colocated_data import ColocatedData - -logger = logging.getLogger(__name__) - - -def calc_mda8(coldata: ColocatedData, /, invar, outvar): - data = coldata.data - ravg = data.rolling(time=8, min_periods=6).mean() - - daily_max = ravg.resample(time="1D").max() - - if isinstance(data, xr.DataArray): - data = data.to_dataset(name=invar) - - data[outvar] = daily_max - - coldata.data = data.to_dataarray() - - return coldata diff --git a/tests/aeroval/test_mda8.py b/tests/aeroval/test_mda8.py deleted file mode 100644 index 90f02be89..000000000 --- a/tests/aeroval/test_mda8.py +++ /dev/null @@ -1,17 +0,0 @@ -import logging - -import pytest - -from pyaerocom.aeroval.mda8 import calc_mda8 -from pyaerocom.colocation.colocated_data import ColocatedData -from tests.fixtures.collocated_data import EXAMPLE_FILE - -logger = logging.getLogger(__name__) - - -def test_calc_mda8(): - coldata = ColocatedData(EXAMPLE_FILE) - - coldata2 = calc_mda8(coldata, "od550aer", "od550aer2") - - pass From e08e3ec5b4a927c446a27716de43b7ba32ebdc9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Mon, 24 Jun 2024 11:41:50 +0000 Subject: [PATCH 05/22] Cleanup --- pyaerocom/aeroval/coldatatojson_engine.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyaerocom/aeroval/coldatatojson_engine.py b/pyaerocom/aeroval/coldatatojson_engine.py index b19c5b3c2..67cb22933 100644 --- a/pyaerocom/aeroval/coldatatojson_engine.py +++ b/pyaerocom/aeroval/coldatatojson_engine.py @@ -32,7 +32,6 @@ ) from pyaerocom.aeroval.exceptions import ConfigError from pyaerocom.aeroval.json_utils import write_json -from pyaerocom.aeroval.mda8 import calc_mda8 from pyaerocom.exceptions import TemporalResolutionError logger = logging.getLogger(__name__) @@ -152,10 +151,6 @@ def process_coldata(self, coldata: ColocatedData): else: obs_var = model_var = "UNDEFINED" - if use_fairmode: - if "conco3" in coldata.data.variable: - coldata = calc_mda8(coldata, "conco3mda8", "conco3mda8") - model_name = coldata.model_name obs_name = coldata.obs_name From 667251677d1dcc4bca62336dbe9d79b65dd8d864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Tue, 25 Jun 2024 07:47:42 +0000 Subject: [PATCH 06/22] test: Additional test for mda8 calculation --- .gitignore | 3 +++ pyaerocom/mda8/mda8.py | 14 ++++++++++++++ pytest.ini | 3 +++ tests/mda8/test_mda8.py | 31 +++++++++++++++++++++++++------ 4 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 pytest.ini diff --git a/.gitignore b/.gitignore index 1df5ec4f8..ee3463cc9 100644 --- a/.gitignore +++ b/.gitignore @@ -156,3 +156,6 @@ sorted_out # Vim temp files *.swp + + +pytest.ini \ No newline at end of file diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/mda8/mda8.py index ad0f32644..5c56d9acb 100644 --- a/pyaerocom/mda8/mda8.py +++ b/pyaerocom/mda8/mda8.py @@ -1,6 +1,20 @@ import numpy as np import xarray as xr +from pyaerocom.colocation.colocated_data import ColocatedData + + +def mda8_colocated_data(coldat: ColocatedData) -> ColocatedData: + """Applies the mda8 calculation to a colocated data object, + returning the new colocated data object. + + :param data: The colocated data object. + :return: Colocated data object containing + """ + + new_data = ColocatedData(calc_mda8(coldat)) + return new_data + def calc_mda8(data: xr.DataArray) -> xr.DataArray: """Calculates the daily max 8h average for an array: diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..20ce9e6dc --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +log_cli = True +log_cli_level = DEBUG \ No newline at end of file diff --git a/tests/mda8/test_mda8.py b/tests/mda8/test_mda8.py index ceff13247..8c3a099c5 100644 --- a/tests/mda8/test_mda8.py +++ b/tests/mda8/test_mda8.py @@ -6,19 +6,17 @@ @pytest.mark.parametrize( - "time,values,exp_length,exp_mda8", + "time,values,exp_mda8", ( pytest.param( xr.date_range(start="2024-01-01", periods=49, freq="1h"), [0] * 49, - 3, [0, 0, np.nan], id="zeros", ), pytest.param( xr.date_range(start="2024-01-01", periods=49, freq="1h"), np.linspace(start=1, stop=49, num=49), - 3, [20.5, 44.5, np.nan], id="incrementing-by-1", ), @@ -33,12 +31,12 @@ np.arange(start=28, stop=50), ) ), - 3, [np.nan] * 3, + id="with-nans", ), ), ) -def test_calc_mda8(time, values, exp_length, exp_mda8): +def test_calc_mda8(time, values, exp_mda8): arr = xr.DataArray( [[[x] for x in values]], dims=["data_source", "time", "station_name"], @@ -47,6 +45,27 @@ def test_calc_mda8(time, values, exp_length, exp_mda8): mda8 = calc_mda8(arr) - assert mda8.shape[1] == exp_length + assert mda8.shape[1] == len(exp_mda8) np.testing.assert_array_equal(mda8[0, :, 0], exp_mda8) + + +def test_calc_mda8_with_gap(): + arr1 = xr.DataArray( + [[[x] for x in np.linspace(start=1, stop=50, num=50)]], + dims=["data_source", "time", "station_name"], + coords={"time": xr.date_range(start="2024-01-01", periods=50, freq="1h")}, + ) + + arr2 = xr.DataArray( + [[[x] for x in np.linspace(start=1, stop=50, num=50)]], + dims=["data_source", "time", "station_name"], + coords={"time": xr.date_range(start="2024-01-04", periods=50, freq="1h")}, + ) + + arr = xr.concat((arr1, arr2), dim="time") + + mda8 = calc_mda8(arr) + + assert mda8.shape == (1, 6, 1) + np.testing.assert_array_equal(mda8[0, :, 0], [20.5, 44.5, np.nan, 41.25, 44.5, np.nan]) From 2824961cda4d3447fc91d8542adb3d956e4185bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Tue, 25 Jun 2024 09:04:29 +0000 Subject: [PATCH 07/22] WIP --- pyaerocom/colocation/colocator.py | 12 ++++++++---- pyaerocom/mda8/mda8.py | 9 +++++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pyaerocom/colocation/colocator.py b/pyaerocom/colocation/colocator.py index 8b97d5d4c..0828ba3b3 100644 --- a/pyaerocom/colocation/colocator.py +++ b/pyaerocom/colocation/colocator.py @@ -7,6 +7,7 @@ import os import traceback import warnings +from collections import defaultdict from datetime import datetime from typing import Any, Callable @@ -30,6 +31,7 @@ from pyaerocom.io import ReadCAMS2_83, ReadGridded, ReadUngridded from pyaerocom.io.helpers import get_all_supported_ids_ungridded from pyaerocom.io.mscw_ctm.reader import ReadMscwCtm +from pyaerocom.mda8.mda8 import mda8_colocated_data from .colocated_data import ColocatedData from .colocation_3d import ColocatedDataLists, colocate_vertical_profile_gridded @@ -360,7 +362,7 @@ def run(self, var_list: list = None): dictionaries comprising key / value pairs of obs variables and associated instances of :class:`ColocatedData`. """ - data_out = {} + data_out = defaultdict(lambda: dict()) # ToDo: see if the following could be solved via custom context manager try: vars_to_process = self.prepare_run(var_list) @@ -378,9 +380,11 @@ def run(self, var_list: list = None): coldata = self._run_helper( mod_var, obs_var ) # note this can be ColocatedData or ColocatedDataLists - if mod_var not in data_out: - data_out[mod_var] = {} data_out[mod_var][obs_var] = coldata + + if isinstance(coldata, ColocatedData): + data_out[f"{mod_var}mda8"][f"{obs_var}mda8"] = mda8_colocated_data(coldata) + self._processing_status.append([mod_var, obs_var, 1]) except Exception: msg = f"Failed to perform analysis: {traceback.format_exc()}\n" @@ -397,7 +401,7 @@ def run(self, var_list: list = None): self._print_processing_status() if self.colocation_setup.keep_data: self.data = data_out - return data_out + return dict(data_out) def get_nc_files_in_coldatadir(self): """ diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/mda8/mda8.py index 5c56d9acb..37baa1de7 100644 --- a/pyaerocom/mda8/mda8.py +++ b/pyaerocom/mda8/mda8.py @@ -2,17 +2,22 @@ import xarray as xr from pyaerocom.colocation.colocated_data import ColocatedData +from pyaerocom.colocation.colocation_3d import ColocatedDataLists -def mda8_colocated_data(coldat: ColocatedData) -> ColocatedData: +def mda8_colocated_data( + coldat: ColocatedData | ColocatedDataLists, +) -> ColocatedData | ColocatedDataLists: """Applies the mda8 calculation to a colocated data object, returning the new colocated data object. :param data: The colocated data object. :return: Colocated data object containing """ + if isinstance(coldat, ColocatedDataLists): + raise NotImplementedError("ColocatedDataLists not currently supported.") - new_data = ColocatedData(calc_mda8(coldat)) + new_data = ColocatedData(calc_mda8(coldat.data)) return new_data From 951e204c01d9214c28fb363543d1555bbf191bb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Tue, 25 Jun 2024 09:24:28 +0000 Subject: [PATCH 08/22] Remove pytest.ini --- pyaerocom/colocation/colocator.py | 1 + pytest.ini | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 pytest.ini diff --git a/pyaerocom/colocation/colocator.py b/pyaerocom/colocation/colocator.py index 0828ba3b3..4f366ecdb 100644 --- a/pyaerocom/colocation/colocator.py +++ b/pyaerocom/colocation/colocator.py @@ -382,6 +382,7 @@ def run(self, var_list: list = None): ) # note this can be ColocatedData or ColocatedDataLists data_out[mod_var][obs_var] = coldata + # TODO: Currently applies mda8 to every variable. Should only apply to the ones we care about. if isinstance(coldata, ColocatedData): data_out[f"{mod_var}mda8"][f"{obs_var}mda8"] = mda8_colocated_data(coldata) diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 20ce9e6dc..000000000 --- a/pytest.ini +++ /dev/null @@ -1,3 +0,0 @@ -[pytest] -log_cli = True -log_cli_level = DEBUG \ No newline at end of file From 1475919f2256880919a1cf442ebb746b90b08e36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Wed, 26 Jun 2024 08:33:35 +0000 Subject: [PATCH 09/22] Mostly validation --- pyaerocom/colocation/colocator.py | 6 +-- pyaerocom/mda8/mda8.py | 73 +++++++++++++++++++------------ tests/mda8/test_mda8.py | 6 +-- 3 files changed, 50 insertions(+), 35 deletions(-) diff --git a/pyaerocom/colocation/colocator.py b/pyaerocom/colocation/colocator.py index 4f366ecdb..c77b4674d 100644 --- a/pyaerocom/colocation/colocator.py +++ b/pyaerocom/colocation/colocator.py @@ -382,9 +382,9 @@ def run(self, var_list: list = None): ) # note this can be ColocatedData or ColocatedDataLists data_out[mod_var][obs_var] = coldata - # TODO: Currently applies mda8 to every variable. Should only apply to the ones we care about. - if isinstance(coldata, ColocatedData): - data_out[f"{mod_var}mda8"][f"{obs_var}mda8"] = mda8_colocated_data(coldata) + if obs_var in ["conco3", "vmro3"]: + if isinstance(coldata, ColocatedData): + data_out[f"{mod_var}mda8"][f"{obs_var}mda8"] = mda8_colocated_data(coldata) self._processing_status.append([mod_var, obs_var, 1]) except Exception: diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/mda8/mda8.py index 37baa1de7..e442c6aad 100644 --- a/pyaerocom/mda8/mda8.py +++ b/pyaerocom/mda8/mda8.py @@ -5,6 +5,24 @@ from pyaerocom.colocation.colocation_3d import ColocatedDataLists +def min_periods_max(x: np.ndarray, /, min_periods=1) -> float: + """Calculates the max of a 1-dimensional array, returning + nan if not a minimum count of valid values exist. + + :param x: 1-dimensional ndarray. + :param min_periods: minimum required non-nan values, defaults to 1 + :return: A single value, which is either nan or a float. + """ + if x.ndim != 1: + raise ValueError(f"Unexpected number of dimensions. Got {x.ndim}, expected 1.") + + length = np.sum(~np.isnan(x)) + if length < min_periods: + return np.nan + + return np.nanmax(x) + + def mda8_colocated_data( coldat: ColocatedData | ColocatedDataLists, ) -> ColocatedData | ColocatedDataLists: @@ -14,14 +32,35 @@ def mda8_colocated_data( :param data: The colocated data object. :return: Colocated data object containing """ - if isinstance(coldat, ColocatedDataLists): - raise NotImplementedError("ColocatedDataLists not currently supported.") + if not isinstance(coldat, ColocatedDataLists | ColocatedData): + raise ValueError( + f"Unexpected type {type(coldat)}. Expected ColocatedData or ColocatedDataLists" + ) + + if isinstance(coldat, ColocatedData): + if coldat.ts_type != "hourly": + raise ValueError(f"Expected hourly timeseries. Got {coldat.ts_type}.") + + # TODO: Currently order of dims matter in the implementation, so this check is + # stricter than it probably should be. + if coldat.dims != ["data_source", "time", "station_name"]: + raise ValueError( + f"Unexpected dimensions. Got {coldat.dims}, expected ['data_source', 'time', 'station_name']." + ) - new_data = ColocatedData(calc_mda8(coldat.data)) - return new_data + return ColocatedData(_calc_mda8(coldat.data)) + colocateddata_for_statistics = [] + colocateddata_for_profile_viz = [] + for cd in coldat.colocateddata_for_statistics: + colocateddata_for_statistics.append(mda8_colocated_data(cd)) + for cd in coldat.colocateddata_for_profile_viz: + colocateddata_for_profile_viz.append(mda8_colocated_data(cd)) -def calc_mda8(data: xr.DataArray) -> xr.DataArray: + return ColocatedDataLists(colocateddata_for_statistics, colocateddata_for_profile_viz) + + +def _calc_mda8(data: xr.DataArray) -> xr.DataArray: """Calculates the daily max 8h average for an array: :param data: The DataArray for which to calculate the mda8. Input @@ -30,30 +69,6 @@ def calc_mda8(data: xr.DataArray) -> xr.DataArray: :return: Equivalently structured DataArray, resampled along the "time" dimension. """ - - def min_periods_max(x: np.ndarray, /, min_periods=1) -> float: - """Calculates the max of a 1-dimensional array, returning - nan if not a minimum set of valid values exist. - - :param x: 1-dimensional ndarray. - :param min_periods: minimum required non-nan values, defaults to 1 - :return: A single value, which is either nan or a float. - """ - if not x.ndim == 1: - raise ValueError(f"Unexpected number of dimensions. Got {x.ndim}, expected 1.") - - mask = ~np.isnan(x) - length = np.sum(mask) - if length >= min_periods: - mx = np.nanmax(x) - return mx - - else: - return np.nan - - # TODO: This function should check some underlying assumptions such as the dimensions, - # that the data represents hourly data and so on. - ravg = data.rolling(time=8, min_periods=6).mean() daily_max = ravg.resample(time="1D").reduce( diff --git a/tests/mda8/test_mda8.py b/tests/mda8/test_mda8.py index 8c3a099c5..80d1dd11d 100644 --- a/tests/mda8/test_mda8.py +++ b/tests/mda8/test_mda8.py @@ -2,7 +2,7 @@ import pytest import xarray as xr -from pyaerocom.mda8.mda8 import calc_mda8 +from pyaerocom.mda8.mda8 import _calc_mda8 @pytest.mark.parametrize( @@ -43,7 +43,7 @@ def test_calc_mda8(time, values, exp_mda8): coords={"time": time}, ) - mda8 = calc_mda8(arr) + mda8 = _calc_mda8(arr) assert mda8.shape[1] == len(exp_mda8) @@ -65,7 +65,7 @@ def test_calc_mda8_with_gap(): arr = xr.concat((arr1, arr2), dim="time") - mda8 = calc_mda8(arr) + mda8 = _calc_mda8(arr) assert mda8.shape == (1, 6, 1) np.testing.assert_array_equal(mda8[0, :, 0], [20.5, 44.5, np.nan, 41.25, 44.5, np.nan]) From 644e615b2fd5c2926dc1bd6e63a3572cac291f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Wed, 26 Jun 2024 12:18:44 +0000 Subject: [PATCH 10/22] mda8 should now include information about tstype and variable name --- pyaerocom/colocation/colocator.py | 13 +++++++++++-- pyaerocom/mda8/mda8.py | 15 +++++++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/pyaerocom/colocation/colocator.py b/pyaerocom/colocation/colocator.py index c77b4674d..711ce81ab 100644 --- a/pyaerocom/colocation/colocator.py +++ b/pyaerocom/colocation/colocator.py @@ -383,8 +383,17 @@ def run(self, var_list: list = None): data_out[mod_var][obs_var] = coldata if obs_var in ["conco3", "vmro3"]: - if isinstance(coldata, ColocatedData): - data_out[f"{mod_var}mda8"][f"{obs_var}mda8"] = mda8_colocated_data(coldata) + mda8 = None + try: + mda8 = mda8_colocated_data( + coldata, obs_var=f"{obs_var}mda8", mod_var=f"{mod_var}mda8" + ) + except ValueError: + logger.warning( + "Tried calculating mda8 for [{obs_var}, {mod_var}], but failed." + ) + finally: + data_out[f"{mod_var}mda8"][f"{obs_var}mda8"] = mda8 self._processing_status.append([mod_var, obs_var, 1]) except Exception: diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/mda8/mda8.py index e442c6aad..8947c6f58 100644 --- a/pyaerocom/mda8/mda8.py +++ b/pyaerocom/mda8/mda8.py @@ -24,7 +24,7 @@ def min_periods_max(x: np.ndarray, /, min_periods=1) -> float: def mda8_colocated_data( - coldat: ColocatedData | ColocatedDataLists, + coldat: ColocatedData | ColocatedDataLists, /, obs_var: str, mod_var: str ) -> ColocatedData | ColocatedDataLists: """Applies the mda8 calculation to a colocated data object, returning the new colocated data object. @@ -48,14 +48,21 @@ def mda8_colocated_data( f"Unexpected dimensions. Got {coldat.dims}, expected ['data_source', 'time', 'station_name']." ) - return ColocatedData(_calc_mda8(coldat.data)) + cd = ColocatedData(_calc_mda8(coldat.data)) + cd.ts_type = "daily" + cd.var_name = [obs_var, mod_var] + return colocateddata_for_statistics = [] colocateddata_for_profile_viz = [] for cd in coldat.colocateddata_for_statistics: - colocateddata_for_statistics.append(mda8_colocated_data(cd)) + colocateddata_for_statistics.append( + mda8_colocated_data(cd, obs_var=obs_var, mod_var=mod_var) + ) for cd in coldat.colocateddata_for_profile_viz: - colocateddata_for_profile_viz.append(mda8_colocated_data(cd)) + colocateddata_for_profile_viz.append( + mda8_colocated_data(cd, obs_var=obs_var, mod_var=mod_var) + ) return ColocatedDataLists(colocateddata_for_statistics, colocateddata_for_profile_viz) From ad73586a907c4e7fa5436ad690b146882d4e795c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Mon, 1 Jul 2024 07:56:25 +0000 Subject: [PATCH 11/22] Test on colocated data fixture --- pyaerocom/mda8/mda8.py | 9 +++++---- tests/mda8/test_mda8.py | 14 +++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/mda8/mda8.py index 8947c6f58..e80b396c6 100644 --- a/pyaerocom/mda8/mda8.py +++ b/pyaerocom/mda8/mda8.py @@ -43,15 +43,14 @@ def mda8_colocated_data( # TODO: Currently order of dims matter in the implementation, so this check is # stricter than it probably should be. - if coldat.dims != ["data_source", "time", "station_name"]: + if coldat.dims != ("data_source", "time", "station_name"): raise ValueError( f"Unexpected dimensions. Got {coldat.dims}, expected ['data_source', 'time', 'station_name']." ) cd = ColocatedData(_calc_mda8(coldat.data)) - cd.ts_type = "daily" - cd.var_name = [obs_var, mod_var] - return + cd.data.attrs["var_name"] = [obs_var, mod_var] + return cd colocateddata_for_statistics = [] colocateddata_for_profile_viz = [] @@ -82,4 +81,6 @@ def _calc_mda8(data: xr.DataArray) -> xr.DataArray: lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=16) ) + daily_max.attrs["ts_type"] = "daily" + return daily_max diff --git a/tests/mda8/test_mda8.py b/tests/mda8/test_mda8.py index 80d1dd11d..4dd6886b2 100644 --- a/tests/mda8/test_mda8.py +++ b/tests/mda8/test_mda8.py @@ -2,7 +2,9 @@ import pytest import xarray as xr -from pyaerocom.mda8.mda8 import _calc_mda8 +from pyaerocom.colocation.colocated_data import ColocatedData +from pyaerocom.mda8.mda8 import _calc_mda8, mda8_colocated_data +from tests.fixtures.collocated_data import coldata @pytest.mark.parametrize( @@ -69,3 +71,13 @@ def test_calc_mda8_with_gap(): assert mda8.shape == (1, 6, 1) np.testing.assert_array_equal(mda8[0, :, 0], [20.5, 44.5, np.nan, 41.25, 44.5, np.nan]) + + +@pytest.mark.parametrize("coldataset", ("fake_3d_hr",)) +def test_coldata_to_mda8(coldata): + mda8 = mda8_colocated_data(coldata, obs_var="vmro3mda8", mod_var="vmro3mda8") + + assert isinstance(mda8, ColocatedData) + assert mda8.metadata["ts_type"] == "daily" + assert mda8.metadata["var_name"] == ["vmro3mda8", "vmro3mda8"] + assert mda8.shape == (2, 8, 1) From a485162a4976408d8f65473f6df749cd9ff86b89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Mon, 1 Jul 2024 08:09:17 +0000 Subject: [PATCH 12/22] Test values --- tests/mda8/test_mda8.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/mda8/test_mda8.py b/tests/mda8/test_mda8.py index 4dd6886b2..c2dfa1f7a 100644 --- a/tests/mda8/test_mda8.py +++ b/tests/mda8/test_mda8.py @@ -81,3 +81,23 @@ def test_coldata_to_mda8(coldata): assert mda8.metadata["ts_type"] == "daily" assert mda8.metadata["var_name"] == ["vmro3mda8", "vmro3mda8"] assert mda8.shape == (2, 8, 1) + + np.testing.assert_array_almost_equal( + mda8.data.values[0, :, 0], + [np.nan, np.nan, 1.18741556, 1.18777241, 1.18869106, 1.18879322, 1.18807846, 1.18700801], + decimal=5, + ) + np.testing.assert_array_almost_equal( + mda8.data.values[1, :, 0], + [ + 1.57327333, + 1.28884431, + 1.28741556, + 1.28777241, + 1.28869106, + 1.28879322, + 1.28807846, + 1.28700801, + ], + decimal=5, + ) From f3b3d32fe4a8c80a4001a8270d85fd28437c57cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Mon, 1 Jul 2024 08:52:06 +0000 Subject: [PATCH 13/22] Test ColocatedDataLists --- tests/mda8/test_mda8.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/mda8/test_mda8.py b/tests/mda8/test_mda8.py index c2dfa1f7a..f664de378 100644 --- a/tests/mda8/test_mda8.py +++ b/tests/mda8/test_mda8.py @@ -3,6 +3,7 @@ import xarray as xr from pyaerocom.colocation.colocated_data import ColocatedData +from pyaerocom.colocation.colocation_3d import ColocatedDataLists from pyaerocom.mda8.mda8 import _calc_mda8, mda8_colocated_data from tests.fixtures.collocated_data import coldata @@ -101,3 +102,15 @@ def test_coldata_to_mda8(coldata): ], decimal=5, ) + + +@pytest.mark.parametrize("coldataset", ("fake_3d_hr",)) +def test_coldata_to_mda8_ColocatedDataLists(coldata): + cd_list = ColocatedDataLists([coldata], []) + + mda8 = mda8_colocated_data(cd_list, obs_var="vmro3mda8", mod_var="vmro3mda8") + + assert isinstance(mda8, ColocatedDataLists) + + assert len(mda8.colocateddata_for_statistics) == 1 + assert len(mda8.colocateddata_for_profile_viz) == 0 From 1e0e16d84f0c89149a231c003d1aa3e7bba84c99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Mon, 1 Jul 2024 12:09:46 +0000 Subject: [PATCH 14/22] 16->18 observations required for mda8 calculation --- pyaerocom/mda8/mda8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/mda8/mda8.py index e80b396c6..7c9b9c4e0 100644 --- a/pyaerocom/mda8/mda8.py +++ b/pyaerocom/mda8/mda8.py @@ -78,7 +78,7 @@ def _calc_mda8(data: xr.DataArray) -> xr.DataArray: ravg = data.rolling(time=8, min_periods=6).mean() daily_max = ravg.resample(time="1D").reduce( - lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=16) + lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=18) ) daily_max.attrs["ts_type"] = "daily" From 6aba610dee3dcefc90fbd1a7faefaa2845b56203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Mon, 1 Jul 2024 14:50:04 +0000 Subject: [PATCH 15/22] Code review 2 --- pyaerocom/colocation/colocator.py | 2 +- pyaerocom/mda8/mda8.py | 63 +++++++++++++++---------------- tests/mda8/test_mda8.py | 22 +++-------- 3 files changed, 36 insertions(+), 51 deletions(-) diff --git a/pyaerocom/colocation/colocator.py b/pyaerocom/colocation/colocator.py index 711ce81ab..83e20fb0e 100644 --- a/pyaerocom/colocation/colocator.py +++ b/pyaerocom/colocation/colocator.py @@ -390,7 +390,7 @@ def run(self, var_list: list = None): ) except ValueError: logger.warning( - "Tried calculating mda8 for [{obs_var}, {mod_var}], but failed." + "Tried calculating mda8 for [%s, %s], but failed.", obs_var, mod_var ) finally: data_out[f"{mod_var}mda8"][f"{obs_var}mda8"] = mda8 diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/mda8/mda8.py index 7c9b9c4e0..84e89fd1c 100644 --- a/pyaerocom/mda8/mda8.py +++ b/pyaerocom/mda8/mda8.py @@ -2,7 +2,6 @@ import xarray as xr from pyaerocom.colocation.colocated_data import ColocatedData -from pyaerocom.colocation.colocation_3d import ColocatedDataLists def min_periods_max(x: np.ndarray, /, min_periods=1) -> float: @@ -23,47 +22,29 @@ def min_periods_max(x: np.ndarray, /, min_periods=1) -> float: return np.nanmax(x) -def mda8_colocated_data( - coldat: ColocatedData | ColocatedDataLists, /, obs_var: str, mod_var: str -) -> ColocatedData | ColocatedDataLists: +def mda8_colocated_data(coldat: ColocatedData, /, obs_var: str, mod_var: str) -> ColocatedData: """Applies the mda8 calculation to a colocated data object, returning the new colocated data object. :param data: The colocated data object. :return: Colocated data object containing """ - if not isinstance(coldat, ColocatedDataLists | ColocatedData): - raise ValueError( - f"Unexpected type {type(coldat)}. Expected ColocatedData or ColocatedDataLists" - ) + if not isinstance(coldat, ColocatedData): + raise ValueError(f"Unexpected type {type(coldat)}. Expected ColocatedData") - if isinstance(coldat, ColocatedData): - if coldat.ts_type != "hourly": - raise ValueError(f"Expected hourly timeseries. Got {coldat.ts_type}.") - - # TODO: Currently order of dims matter in the implementation, so this check is - # stricter than it probably should be. - if coldat.dims != ("data_source", "time", "station_name"): - raise ValueError( - f"Unexpected dimensions. Got {coldat.dims}, expected ['data_source', 'time', 'station_name']." - ) - - cd = ColocatedData(_calc_mda8(coldat.data)) - cd.data.attrs["var_name"] = [obs_var, mod_var] - return cd - - colocateddata_for_statistics = [] - colocateddata_for_profile_viz = [] - for cd in coldat.colocateddata_for_statistics: - colocateddata_for_statistics.append( - mda8_colocated_data(cd, obs_var=obs_var, mod_var=mod_var) - ) - for cd in coldat.colocateddata_for_profile_viz: - colocateddata_for_profile_viz.append( - mda8_colocated_data(cd, obs_var=obs_var, mod_var=mod_var) + if coldat.ts_type != "hourly": + raise ValueError(f"Expected hourly timeseries. Got {coldat.ts_type}.") + + # TODO: Currently order of dims matter in the implementation, so this check is + # stricter than it probably should be. + if coldat.dims != ("data_source", "time", "station_name"): + raise ValueError( + f"Unexpected dimensions. Got {coldat.dims}, expected ['data_source', 'time', 'station_name']." ) - return ColocatedDataLists(colocateddata_for_statistics, colocateddata_for_profile_viz) + cd = ColocatedData(_calc_mda8(coldat.data)) + cd.data.attrs["var_name"] = [obs_var, mod_var] + return cd def _calc_mda8(data: xr.DataArray) -> xr.DataArray: @@ -74,6 +55,22 @@ def _calc_mda8(data: xr.DataArray) -> xr.DataArray: (ie. the format of ColocatedData.data) representing hourly data. :return: Equivalently structured DataArray, resampled along the "time" dimension. + + Note: + ----- + The calculation for mda8 is defined as follows: + > Eight hours values: 75 % of values (i.e. 6 hours) + + > Maximum daily 8-hour mean: 75 % of the hourly running eight hour + averages (i.e. 18 eight hour averages per + day) + + > The maximum daily eight hour mean concentration will be selected by examining + > eight hour running averages, calculated from hourly data and updated each hour. + > Each eight hour average so calculated will be assigned to the day on which it + > ends i.e. the first calculation period for any one day will be the period from + > 17:00 on the previous day to 01:00 on that day; the last calculation period for + > any one day will be the period from 16:00 to 24:00 on that day. """ ravg = data.rolling(time=8, min_periods=6).mean() diff --git a/tests/mda8/test_mda8.py b/tests/mda8/test_mda8.py index f664de378..4c57f425b 100644 --- a/tests/mda8/test_mda8.py +++ b/tests/mda8/test_mda8.py @@ -71,7 +71,7 @@ def test_calc_mda8_with_gap(): mda8 = _calc_mda8(arr) assert mda8.shape == (1, 6, 1) - np.testing.assert_array_equal(mda8[0, :, 0], [20.5, 44.5, np.nan, 41.25, 44.5, np.nan]) + pytest.approx(mda8[0, :, 0], [20.5, 44.5, np.nan, 41.25, 44.5, np.nan], abs=10 * 10**-5) @pytest.mark.parametrize("coldataset", ("fake_3d_hr",)) @@ -83,12 +83,12 @@ def test_coldata_to_mda8(coldata): assert mda8.metadata["var_name"] == ["vmro3mda8", "vmro3mda8"] assert mda8.shape == (2, 8, 1) - np.testing.assert_array_almost_equal( + pytest.approx( mda8.data.values[0, :, 0], [np.nan, np.nan, 1.18741556, 1.18777241, 1.18869106, 1.18879322, 1.18807846, 1.18700801], - decimal=5, + abs=10 * 10**-5, ) - np.testing.assert_array_almost_equal( + pytest.approx( mda8.data.values[1, :, 0], [ 1.57327333, @@ -100,17 +100,5 @@ def test_coldata_to_mda8(coldata): 1.28807846, 1.28700801, ], - decimal=5, + abs=10 * 10**-5, ) - - -@pytest.mark.parametrize("coldataset", ("fake_3d_hr",)) -def test_coldata_to_mda8_ColocatedDataLists(coldata): - cd_list = ColocatedDataLists([coldata], []) - - mda8 = mda8_colocated_data(cd_list, obs_var="vmro3mda8", mod_var="vmro3mda8") - - assert isinstance(mda8, ColocatedDataLists) - - assert len(mda8.colocateddata_for_statistics) == 1 - assert len(mda8.colocateddata_for_profile_viz) == 0 From b06320d2bbe4a662c061c94b9eb9e6b2ab63ff73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Tue, 2 Jul 2024 08:25:51 +0000 Subject: [PATCH 16/22] Add Gauss trick tests --- pyaerocom/mda8/mda8.py | 16 ++++--- tests/mda8/test_mda8.py | 94 ++++++++++++++++++++++++++++++++--------- 2 files changed, 84 insertions(+), 26 deletions(-) diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/mda8/mda8.py index 84e89fd1c..e3f9aee36 100644 --- a/pyaerocom/mda8/mda8.py +++ b/pyaerocom/mda8/mda8.py @@ -72,12 +72,16 @@ def _calc_mda8(data: xr.DataArray) -> xr.DataArray: > 17:00 on the previous day to 01:00 on that day; the last calculation period for > any one day will be the period from 16:00 to 24:00 on that day. """ - ravg = data.rolling(time=8, min_periods=6).mean() + mda8 = _daily_max(_rolling_average_8hr(data)) + mda8.attrs["ts_type"] = "daily" + return mda8 - daily_max = ravg.resample(time="1D").reduce( - lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=18) - ) - daily_max.attrs["ts_type"] = "daily" +def _rolling_average_8hr(arr: xr.DataArray) -> xr.DataArray: + return arr.rolling(time=8, min_periods=6).mean() + - return daily_max +def _daily_max(arr: xr.DataArray) -> xr.DataArray: + return arr.resample(time="24h", offset="1h").reduce( + lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=18) + ) diff --git a/tests/mda8/test_mda8.py b/tests/mda8/test_mda8.py index 4c57f425b..7a6a7d474 100644 --- a/tests/mda8/test_mda8.py +++ b/tests/mda8/test_mda8.py @@ -4,27 +4,36 @@ from pyaerocom.colocation.colocated_data import ColocatedData from pyaerocom.colocation.colocation_3d import ColocatedDataLists -from pyaerocom.mda8.mda8 import _calc_mda8, mda8_colocated_data +from pyaerocom.mda8.mda8 import _calc_mda8, _daily_max, _rolling_average_8hr, mda8_colocated_data from tests.fixtures.collocated_data import coldata +@pytest.fixture +def test_data(time, values) -> xr.DataArray: + return xr.DataArray( + [[[x] for x in values]], + dims=["data_source", "time", "station_name"], + coords={"time": time}, + ) + + @pytest.mark.parametrize( "time,values,exp_mda8", ( pytest.param( - xr.date_range(start="2024-01-01", periods=49, freq="1h"), + xr.date_range(start="2024-01-01 01:00", periods=49, freq="1h"), [0] * 49, [0, 0, np.nan], id="zeros", ), pytest.param( - xr.date_range(start="2024-01-01", periods=49, freq="1h"), + xr.date_range(start="2024-01-01 01:00", periods=49, freq="1h"), np.linspace(start=1, stop=49, num=49), [20.5, 44.5, np.nan], id="incrementing-by-1", ), pytest.param( - xr.date_range(start="2024-01-01 12:00:00", periods=49, freq="1h"), + xr.date_range(start="2024-01-01 13:00:00", periods=49, freq="1h"), np.concatenate( # Sequence 1-7, 3x nan, 11-14, 3x nan, 28-49 ( np.arange(start=1, stop=8), @@ -39,14 +48,8 @@ ), ), ) -def test_calc_mda8(time, values, exp_mda8): - arr = xr.DataArray( - [[[x] for x in values]], - dims=["data_source", "time", "station_name"], - coords={"time": time}, - ) - - mda8 = _calc_mda8(arr) +def test_calc_mda8(test_data, exp_mda8): + mda8 = _calc_mda8(test_data) assert mda8.shape[1] == len(exp_mda8) @@ -57,13 +60,13 @@ def test_calc_mda8_with_gap(): arr1 = xr.DataArray( [[[x] for x in np.linspace(start=1, stop=50, num=50)]], dims=["data_source", "time", "station_name"], - coords={"time": xr.date_range(start="2024-01-01", periods=50, freq="1h")}, + coords={"time": xr.date_range(start="2024-01-01 01:00", periods=50, freq="1h")}, ) arr2 = xr.DataArray( [[[x] for x in np.linspace(start=1, stop=50, num=50)]], dims=["data_source", "time", "station_name"], - coords={"time": xr.date_range(start="2024-01-04", periods=50, freq="1h")}, + coords={"time": xr.date_range(start="2024-01-04 01:00", periods=50, freq="1h")}, ) arr = xr.concat((arr1, arr2), dim="time") @@ -81,16 +84,26 @@ def test_coldata_to_mda8(coldata): assert isinstance(mda8, ColocatedData) assert mda8.metadata["ts_type"] == "daily" assert mda8.metadata["var_name"] == ["vmro3mda8", "vmro3mda8"] - assert mda8.shape == (2, 8, 1) + assert mda8.shape == (2, 9, 1) - pytest.approx( + np.testing.assert_array_almost_equal( mda8.data.values[0, :, 0], - [np.nan, np.nan, 1.18741556, 1.18777241, 1.18869106, 1.18879322, 1.18807846, 1.18700801], - abs=10 * 10**-5, + [ + np.nan, + np.nan, + np.nan, + 1.18741556, + 1.18777241, + 1.18869106, + 1.18879322, + 1.18807846, + 1.18700801, + ], ) - pytest.approx( + np.testing.assert_array_almost_equal( mda8.data.values[1, :, 0], [ + np.nan, 1.57327333, 1.28884431, 1.28741556, @@ -100,5 +113,46 @@ def test_coldata_to_mda8(coldata): 1.28807846, 1.28700801, ], - abs=10 * 10**-5, ) + + +@pytest.mark.parametrize( + "time,values,exp_ravg", + ( + ( + xr.date_range(start="2024-01-01 01:00", periods=48, freq="1h"), + [0, 1, 2, 3, 4, 5, 6, 7] * 6, + [np.nan] * 5 + [2.5, 3] + [3.5] * 41, + ), + ), +) +def test_rollingaverage(test_data, exp_ravg): + ravg = _rolling_average_8hr(test_data) + + np.testing.assert_array_almost_equal(ravg[0, :, 0], exp_ravg) + + +@pytest.mark.parametrize( + "time,values,exp_daily_max", + ( + ( + xr.date_range(start="2024-01-01 01:00", periods=48, freq="1h"), + np.linspace(1, 48, num=48), + [24, 48], + ), + ( + xr.date_range(start="2024-01-01 01:00", periods=24, freq="1h"), + [np.nan] * 6 + list(range(7, 25)), + [24], + ), + ( + xr.date_range(start="2024-01-01 01:00", periods=24, freq="1h"), + [np.nan] * 7 + list(range(8, 25)), + [np.nan], + ), + ), +) +def test_daily_max(test_data, exp_daily_max): + dmax = _daily_max(test_data) + + np.testing.assert_array_equal(dmax[0, :, 0], exp_daily_max) From e9164106043716bf16de91f4892a271cce0cc012 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Tue, 2 Jul 2024 09:11:44 +0000 Subject: [PATCH 17/22] Add edge cases --- tests/mda8/test_mda8.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/mda8/test_mda8.py b/tests/mda8/test_mda8.py index 7a6a7d474..678e62f40 100644 --- a/tests/mda8/test_mda8.py +++ b/tests/mda8/test_mda8.py @@ -124,6 +124,21 @@ def test_coldata_to_mda8(coldata): [0, 1, 2, 3, 4, 5, 6, 7] * 6, [np.nan] * 5 + [2.5, 3] + [3.5] * 41, ), + ( + xr.date_range(start="2024-01-01 01:00", periods=24, freq="1h"), + [np.nan, 1, 2, 3, 4, 5, 6, 7] * 3, + [np.nan] * 6 + [3.5] + [4] * 17, + ), + ( + xr.date_range(start="2024-01-01 01:00", periods=24, freq="1h"), + [np.nan, np.nan, 2, 3, 4, 5, 6, 7] * 3, + [np.nan] * 7 + [4.5] * 17, + ), + ( + xr.date_range(start="2024-01-01 01:00", periods=24, freq="1h"), + [np.nan, np.nan, np.nan, 3, 4, 5, 6, 7] * 3, + [np.nan] * 24, + ), ), ) def test_rollingaverage(test_data, exp_ravg): From 7d158fa39c591be09da8db90161dd340c1c8425c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Tue, 2 Jul 2024 09:32:01 +0000 Subject: [PATCH 18/22] Refactor+pytest.approx --- pyaerocom/colocation/colocator.py | 2 +- pyaerocom/{ => stats}/mda8/__init__.py | 0 pyaerocom/{ => stats}/mda8/mda8.py | 0 tests/{ => stats}/mda8/__init__.py | 0 tests/{ => stats}/mda8/test_mda8.py | 33 +++++++++++++++++++------- 5 files changed, 26 insertions(+), 9 deletions(-) rename pyaerocom/{ => stats}/mda8/__init__.py (100%) rename pyaerocom/{ => stats}/mda8/mda8.py (100%) rename tests/{ => stats}/mda8/__init__.py (100%) rename tests/{ => stats}/mda8/test_mda8.py (83%) diff --git a/pyaerocom/colocation/colocator.py b/pyaerocom/colocation/colocator.py index 83e20fb0e..2c0239f81 100644 --- a/pyaerocom/colocation/colocator.py +++ b/pyaerocom/colocation/colocator.py @@ -31,7 +31,7 @@ from pyaerocom.io import ReadCAMS2_83, ReadGridded, ReadUngridded from pyaerocom.io.helpers import get_all_supported_ids_ungridded from pyaerocom.io.mscw_ctm.reader import ReadMscwCtm -from pyaerocom.mda8.mda8 import mda8_colocated_data +from pyaerocom.stats.mda8.mda8 import mda8_colocated_data from .colocated_data import ColocatedData from .colocation_3d import ColocatedDataLists, colocate_vertical_profile_gridded diff --git a/pyaerocom/mda8/__init__.py b/pyaerocom/stats/mda8/__init__.py similarity index 100% rename from pyaerocom/mda8/__init__.py rename to pyaerocom/stats/mda8/__init__.py diff --git a/pyaerocom/mda8/mda8.py b/pyaerocom/stats/mda8/mda8.py similarity index 100% rename from pyaerocom/mda8/mda8.py rename to pyaerocom/stats/mda8/mda8.py diff --git a/tests/mda8/__init__.py b/tests/stats/mda8/__init__.py similarity index 100% rename from tests/mda8/__init__.py rename to tests/stats/mda8/__init__.py diff --git a/tests/mda8/test_mda8.py b/tests/stats/mda8/test_mda8.py similarity index 83% rename from tests/mda8/test_mda8.py rename to tests/stats/mda8/test_mda8.py index 678e62f40..fd95ee120 100644 --- a/tests/mda8/test_mda8.py +++ b/tests/stats/mda8/test_mda8.py @@ -4,7 +4,12 @@ from pyaerocom.colocation.colocated_data import ColocatedData from pyaerocom.colocation.colocation_3d import ColocatedDataLists -from pyaerocom.mda8.mda8 import _calc_mda8, _daily_max, _rolling_average_8hr, mda8_colocated_data +from pyaerocom.stats.mda8.mda8 import ( + _calc_mda8, + _daily_max, + _rolling_average_8hr, + mda8_colocated_data, +) from tests.fixtures.collocated_data import coldata @@ -53,7 +58,7 @@ def test_calc_mda8(test_data, exp_mda8): assert mda8.shape[1] == len(exp_mda8) - np.testing.assert_array_equal(mda8[0, :, 0], exp_mda8) + assert all(mda8[0, :, 0] == pytest.approx(exp_mda8, abs=0, nan_ok=True)) def test_calc_mda8_with_gap(): @@ -86,8 +91,7 @@ def test_coldata_to_mda8(coldata): assert mda8.metadata["var_name"] == ["vmro3mda8", "vmro3mda8"] assert mda8.shape == (2, 9, 1) - np.testing.assert_array_almost_equal( - mda8.data.values[0, :, 0], + assert mda8.data.values[0, :, 0] == pytest.approx( [ np.nan, np.nan, @@ -99,9 +103,11 @@ def test_coldata_to_mda8(coldata): 1.18807846, 1.18700801, ], + abs=10**-5, + nan_ok=True, ) - np.testing.assert_array_almost_equal( - mda8.data.values[1, :, 0], + + assert mda8.data.values[1, :, 0] == pytest.approx( [ np.nan, 1.57327333, @@ -113,6 +119,8 @@ def test_coldata_to_mda8(coldata): 1.28807846, 1.28700801, ], + abs=10**-5, + nan_ok=True, ) @@ -142,9 +150,12 @@ def test_coldata_to_mda8(coldata): ), ) def test_rollingaverage(test_data, exp_ravg): + """ + Test of rolling average calculation + """ ravg = _rolling_average_8hr(test_data) - np.testing.assert_array_almost_equal(ravg[0, :, 0], exp_ravg) + assert all(ravg[0, :, 0] == pytest.approx(exp_ravg, abs=0, nan_ok=True)) @pytest.mark.parametrize( @@ -168,6 +179,12 @@ def test_rollingaverage(test_data, exp_ravg): ), ) def test_daily_max(test_data, exp_daily_max): + """ + Tests the daily max calculation. Note that "the first calculation period + for any one day will be the period from 17:00 on the previous day to 01:00 + on that day; the last calculation period for any one day will be the period + from 16:00 to 24:00 on that day + """ dmax = _daily_max(test_data) - np.testing.assert_array_equal(dmax[0, :, 0], exp_daily_max) + assert all(dmax[0, :, 0] == pytest.approx(exp_daily_max, abs=0, nan_ok=True)) From 09dd791f85fb610174b562c5afa75904511de12a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Tue, 2 Jul 2024 09:43:47 +0000 Subject: [PATCH 19/22] h->H --- pyaerocom/stats/mda8/mda8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/stats/mda8/mda8.py b/pyaerocom/stats/mda8/mda8.py index e3f9aee36..869067539 100644 --- a/pyaerocom/stats/mda8/mda8.py +++ b/pyaerocom/stats/mda8/mda8.py @@ -82,6 +82,6 @@ def _rolling_average_8hr(arr: xr.DataArray) -> xr.DataArray: def _daily_max(arr: xr.DataArray) -> xr.DataArray: - return arr.resample(time="24h", offset="1h").reduce( + return arr.resample(time="24H", offset="1H").reduce( lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=18) ) From 0f1c682e11af9ecffb31fd73be7ef16bb58c04bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Tue, 2 Jul 2024 09:58:24 +0000 Subject: [PATCH 20/22] Fix? --- pyaerocom/stats/mda8/mda8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/stats/mda8/mda8.py b/pyaerocom/stats/mda8/mda8.py index 869067539..8f37195a5 100644 --- a/pyaerocom/stats/mda8/mda8.py +++ b/pyaerocom/stats/mda8/mda8.py @@ -82,6 +82,6 @@ def _rolling_average_8hr(arr: xr.DataArray) -> xr.DataArray: def _daily_max(arr: xr.DataArray) -> xr.DataArray: - return arr.resample(time="24H", offset="1H").reduce( + return arr.resample(time="24H", base=1).reduce( lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=18) ) From 6596d7df8b8f04ec7e3fbfa7556c5321afa44b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Tue, 2 Jul 2024 10:44:33 +0000 Subject: [PATCH 21/22] Comment --- pyaerocom/stats/mda8/mda8.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyaerocom/stats/mda8/mda8.py b/pyaerocom/stats/mda8/mda8.py index 8f37195a5..471708fae 100644 --- a/pyaerocom/stats/mda8/mda8.py +++ b/pyaerocom/stats/mda8/mda8.py @@ -82,6 +82,9 @@ def _rolling_average_8hr(arr: xr.DataArray) -> xr.DataArray: def _daily_max(arr: xr.DataArray) -> xr.DataArray: + # TODO: Base is deprecated, and using offset="1h" is the proper way to do this. + # However this currently breaks the old-dependencies test in CI. Should be + # changed in the future. return arr.resample(time="24H", base=1).reduce( lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=18) ) From ace42f6df8f8e675144ebc68a1a7c77ac98f106a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= Date: Tue, 2 Jul 2024 12:01:02 +0000 Subject: [PATCH 22/22] Store MDA8 variable names in constant --- pyaerocom/colocation/colocator.py | 3 ++- pyaerocom/stats/mda8/const.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 pyaerocom/stats/mda8/const.py diff --git a/pyaerocom/colocation/colocator.py b/pyaerocom/colocation/colocator.py index 2c0239f81..444a38a70 100644 --- a/pyaerocom/colocation/colocator.py +++ b/pyaerocom/colocation/colocator.py @@ -31,6 +31,7 @@ from pyaerocom.io import ReadCAMS2_83, ReadGridded, ReadUngridded from pyaerocom.io.helpers import get_all_supported_ids_ungridded from pyaerocom.io.mscw_ctm.reader import ReadMscwCtm +from pyaerocom.stats.mda8.const import MDA_VARS from pyaerocom.stats.mda8.mda8 import mda8_colocated_data from .colocated_data import ColocatedData @@ -382,7 +383,7 @@ def run(self, var_list: list = None): ) # note this can be ColocatedData or ColocatedDataLists data_out[mod_var][obs_var] = coldata - if obs_var in ["conco3", "vmro3"]: + if obs_var in MDA_VARS: mda8 = None try: mda8 = mda8_colocated_data( diff --git a/pyaerocom/stats/mda8/const.py b/pyaerocom/stats/mda8/const.py new file mode 100644 index 000000000..8c32ac409 --- /dev/null +++ b/pyaerocom/stats/mda8/const.py @@ -0,0 +1,2 @@ +# Variable names for which mda8 values are to be calculated. +MDA_VARS = ("conco3", "vmro3")