From abede51dca238e1d626db76692a50401a696a493 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sat, 16 Nov 2024 23:28:00 +0100 Subject: [PATCH 1/9] hack at a first attempt --- pyaerocom/aeroval/collections.py | 123 +++++++++++++++++++++++------ pyaerocom/aeroval/setup_classes.py | 23 ++++-- 2 files changed, 115 insertions(+), 31 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index dc67e3fc0..0ee0a05f8 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -1,5 +1,6 @@ import abc from fnmatch import fnmatch +import json from pyaerocom._lowlevel_helpers import BrowseDict from pyaerocom.aeroval.modelentry import ModelEntry @@ -166,42 +167,103 @@ def all_vert_types(self): return list({x.obs_vert_type for x in self.values()}) -class ModelCollection(BaseCollection): +# class ModelCollection(BaseCollection): +# """ +# Dict-like object that represents a collection of model entries + +# Keys are model names, values are instances of :class:`ModelEntry`. Values +# can also be assigned as dict and will automatically be converted into +# instances of :class:`ModelEntry`. + + +# Note +# ---- +# Entries must not necessarily be only models but may also be observations. +# Entries provided in this collection refer to the x-axis in the AeroVal +# heatmap display and must fulfill the protocol defined by +# :class:`ModelEntry`. + +# """ + +# SETTER_CONVERT = {dict: ModelEntry} + +# def get_entry(self, key) -> ModelEntry: +# """Get model entry configuration + +# Since the configuration files for experiments are in json format, they +# do not allow the storage of executable custom methods for model data +# reading. Instead, these can be specified in a python module that may +# be specified via :attr:`add_methods_file` and that contains a +# dictionary `FUNS` that maps the method names with the callable methods. + +# As a result, this means that, by default, custom read methods for +# individual models in :attr:`model_config` do not contain the +# callable methods but only the names. This method will take care of +# handling this and will return a dictionary where potential custom +# method strings have been converted to the corresponding callable +# methods. + +# Parameters +# ---------- +# model_name : str +# name of model + +# Returns +# ------- +# dict +# Dictionary that specifies the model setup ready for the analysis +# """ +# try: +# entry = self[key] +# entry.model_name = key +# return entry +# except (KeyError, AttributeError): +# raise EntryNotAvailable(f"no such entry {key}") + +# @property +# def web_interface_names(self) -> list: +# """ +# List of web interface names for each obs entry + +# Returns +# ------- +# list +# """ +# return self.keylist() + + +class ModelCollection: """ - Dict-like object that represents a collection of model entries + Object that represents a collection of model entries Keys are model names, values are instances of :class:`ModelEntry`. Values can also be assigned as dict and will automatically be converted into instances of :class:`ModelEntry`. - Note ---- Entries must not necessarily be only models but may also be observations. Entries provided in this collection refer to the x-axis in the AeroVal heatmap display and must fulfill the protocol defined by :class:`ModelEntry`. - """ - SETTER_CONVERT = {dict: ModelEntry} + def __init__(self): + self._entries = {} - def get_entry(self, key) -> ModelEntry: - """Get model entry configuration + def add_entry(self, key, entry: dict | ModelEntry): + if isinstance(entry, dict): + entry = ModelEntry(**entry) + entry.model_name = key + self._entries[key] = entry - Since the configuration files for experiments are in json format, they - do not allow the storage of executable custom methods for model data - reading. Instead, these can be specified in a python module that may - be specified via :attr:`add_methods_file` and that contains a - dictionary `FUNS` that maps the method names with the callable methods. - - As a result, this means that, by default, custom read methods for - individual models in :attr:`model_config` do not contain the - callable methods but only the names. This method will take care of - handling this and will return a dictionary where potential custom - method strings have been converted to the corresponding callable - methods. + def remove_entry(self, key): + if key in self._entries: + del self._entries[key] + def get_entry(self, key) -> ModelEntry: + """ + Get model entry configuration Parameters ---------- model_name : str @@ -212,13 +274,22 @@ def get_entry(self, key) -> ModelEntry: dict Dictionary that specifies the model setup ready for the analysis """ - try: - entry = self[key] - entry.model_name = key - return entry - except (KeyError, AttributeError): + if key in self._entries: + return self._entries[key] + else: raise EntryNotAvailable(f"no such entry {key}") + def keylist(self) -> list: + """ + Return list of keys in collection. + + Returns + ------- + list + list of keys in collection. + """ + return list(self._entries.keys()) + @property def web_interface_names(self) -> list: """ @@ -229,3 +300,7 @@ def web_interface_names(self) -> list: list """ return self.keylist() + + def to_json(self) -> str: + """Serialize ModelCollection to a JSON string.""" + return json.dumps({k: v.dict() for k, v in self._entries.items()}, default=str) diff --git a/pyaerocom/aeroval/setup_classes.py b/pyaerocom/aeroval/setup_classes.py index 1677c1b3e..43c45cad6 100644 --- a/pyaerocom/aeroval/setup_classes.py +++ b/pyaerocom/aeroval/setup_classes.py @@ -513,17 +513,26 @@ def validate_obs_cfg(cls, v): def serialize_obs_cfg(self, obs_cfg: ObsCollection): return obs_cfg.json_repr() - model_cfg: ModelCollection | dict = ModelCollection() + # model_cfg: ModelCollection | dict = ModelCollection() - @field_validator("model_cfg") - def validate_model_cfg(cls, v): - if isinstance(v, ModelCollection): - return v - return ModelCollection(v) + # @field_validator("model_cfg") + # def validate_model_cfg(cls, v): + # if isinstance(v, ModelCollection): + # return v + # return ModelCollection(v) + + @computed_field + @cached_property + def model_cfg(self) -> ModelCollection: + mc = ModelCollection() + for k, v in self.model_extra.model_cfg.items(): + mc.add_entry(k, v) + return mc @field_serializer("model_cfg") def serialize_model_cfg(self, model_cfg: ModelCollection): - return model_cfg.json_repr() + # return model_cfg.json_repr() + return model_cfg.to_json() ########################### ## Methods From a7faf425c4e18f7eb7b7e14c1a6fe211c02c41f1 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sun, 17 Nov 2024 17:17:05 +0100 Subject: [PATCH 2/9] ModelCollection passing Aeroval tests --- pyaerocom/aeroval/collections.py | 52 ++++++++++++++++++---- pyaerocom/aeroval/helpers.py | 2 +- pyaerocom/aeroval/setup_classes.py | 2 +- pyaerocom/aeroval/utils.py | 3 +- tests/aeroval/test_aeroval_HIGHLEV.py | 4 +- tests/aeroval/test_collections.py | 16 ++++--- tests/aeroval/test_experiment_output.py | 10 ++--- tests/aeroval/test_experiment_processor.py | 5 ++- 8 files changed, 69 insertions(+), 25 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index 0ee0a05f8..6a1bb63a1 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -251,17 +251,23 @@ class ModelCollection: def __init__(self): self._entries = {} - def add_entry(self, key, entry: dict | ModelEntry): + def __iter__(self): + """ + Yield each ModelEntry in the collection. + """ + yield from self._entries.values() + + def add_entry(self, key: str, entry: dict | ModelEntry): if isinstance(entry, dict): entry = ModelEntry(**entry) entry.model_name = key self._entries[key] = entry - def remove_entry(self, key): + def remove_entry(self, key: str): if key in self._entries: del self._entries[key] - def get_entry(self, key) -> ModelEntry: + def get_entry(self, key: str) -> ModelEntry: """ Get model entry configuration Parameters @@ -279,16 +285,46 @@ def get_entry(self, key) -> ModelEntry: else: raise EntryNotAvailable(f"no such entry {key}") - def keylist(self) -> list: - """ - Return list of keys in collection. + # def keylist(self) -> list: + # """ + # Return list of keys in collection. + + # Returns + # ------- + # list + # list of keys in collection. + # """ + # return list(self._entries.keys()) + + def keylist(self, name_or_pattern: str = None) -> list: + """Find model names that match input search pattern(s) + + Parameters + ---------- + name_or_pattern : str, optional + Name or pattern specifying search string. Returns ------- list - list of keys in collection. + list of keys in collection that match input requirements. If + `name_or_pattern` is None, all keys will be returned. + + Raises + ------ + KeyError + if no matches can be found """ - return list(self._entries.keys()) + if name_or_pattern is None: + name_or_pattern = "*" + + matches = [] + for key in self._entries.keys(): + if fnmatch(key, name_or_pattern) and key not in matches: + matches.append(key) + if len(matches) == 0: + raise KeyError(f"No matches could be found that match input {name_or_pattern}") + return matches @property def web_interface_names(self) -> list: diff --git a/pyaerocom/aeroval/helpers.py b/pyaerocom/aeroval/helpers.py index 0a692161f..7cc621be0 100644 --- a/pyaerocom/aeroval/helpers.py +++ b/pyaerocom/aeroval/helpers.py @@ -191,7 +191,7 @@ def make_dummy_model(obs_list: list, cfg) -> str: dummy_grid_yr.to_netcdf(outdir, savename=save_name) # Add dummy model to cfg - cfg.model_cfg["dummy"] = ModelEntry(model_id="dummy_model") + cfg.model_cfg.add_entry("dummy", ModelEntry(model_id="dummy_model")) return model_id diff --git a/pyaerocom/aeroval/setup_classes.py b/pyaerocom/aeroval/setup_classes.py index 43c45cad6..5a884eef4 100644 --- a/pyaerocom/aeroval/setup_classes.py +++ b/pyaerocom/aeroval/setup_classes.py @@ -525,7 +525,7 @@ def serialize_obs_cfg(self, obs_cfg: ObsCollection): @cached_property def model_cfg(self) -> ModelCollection: mc = ModelCollection() - for k, v in self.model_extra.model_cfg.items(): + for k, v in self.model_extra.get("model_cfg", {}).items(): mc.add_entry(k, v) return mc diff --git a/pyaerocom/aeroval/utils.py b/pyaerocom/aeroval/utils.py index 8f680fe05..a4163ca3d 100644 --- a/pyaerocom/aeroval/utils.py +++ b/pyaerocom/aeroval/utils.py @@ -145,7 +145,8 @@ def compute_model_average_and_diversity( unit_out = get_variable(var_name).units - for mname in models: + for m in models: + mname = m.model_name logger.info(f"Adding {mname} ({var_name})") mid = cfg.cfg.model_cfg.get_entry(mname).model_id diff --git a/tests/aeroval/test_aeroval_HIGHLEV.py b/tests/aeroval/test_aeroval_HIGHLEV.py index 558983fa7..34caab906 100644 --- a/tests/aeroval/test_aeroval_HIGHLEV.py +++ b/tests/aeroval/test_aeroval_HIGHLEV.py @@ -113,8 +113,8 @@ def test_reanalyse_existing(eval_config: dict, reanalyse_existing: bool): @pytest.mark.parametrize("cfg", ["cfgexp4"]) def test_superobs_different_resolutions(eval_config: dict): cfg = EvalSetup(**eval_config) - cfg.model_cfg["TM5-AP3-CTRL"].model_ts_type_read = None - cfg.model_cfg["TM5-AP3-CTRL"].flex_ts_type = True + cfg.model_cfg.get_entry("TM5-AP3-CTRL").model_ts_type_read = None + cfg.model_cfg.get_entry("TM5-AP3-CTRL").flex_ts_type = True cfg.obs_cfg["AERONET-Sun"].ts_type = "daily" cfg.obs_cfg["AERONET-SDA"].ts_type = "monthly" diff --git a/tests/aeroval/test_collections.py b/tests/aeroval/test_collections.py index 5f2a98f7c..c00e1016c 100644 --- a/tests/aeroval/test_collections.py +++ b/tests/aeroval/test_collections.py @@ -16,13 +16,17 @@ def test_obscollection(): def test_modelcollection(): - mc = ModelCollection(model1=dict(model_id="bla", obs_vars="od550aer", obs_vert_type="Column")) + mc = ModelCollection() + mc.add_entry("model1", dict(model_id="bla", obs_vars="od550aer", obs_vert_type="Column")) assert mc - mc["ECMWF_OSUITE"] = dict( - model_id="ECMWF_OSUITE", - obs_vars=["concpm10"], - obs_vert_type="Surface", + mc.add_entry( + "ECMWF_OSUITE", + dict( + model_id="ECMWF_OSUITE", + obs_vars=["concpm10"], + obs_vert_type="Surface", + ), ) - assert "ECMWF_OSUITE" in mc + assert "ECMWF_OSUITE" in mc.keylist() diff --git a/tests/aeroval/test_experiment_output.py b/tests/aeroval/test_experiment_output.py index a74498eb1..868937929 100644 --- a/tests/aeroval/test_experiment_output.py +++ b/tests/aeroval/test_experiment_output.py @@ -293,10 +293,10 @@ def test_Experiment_Output_clean_json_files_CFG1(eval_config: dict): @pytest.mark.parametrize("cfg", ["cfgexp1"]) def test_Experiment_Output_clean_json_files_CFG1_INVALIDMOD(eval_config: dict): cfg = EvalSetup(**eval_config) - cfg.model_cfg["mod1"] = cfg.model_cfg["TM5-AP3-CTRL"] + cfg.model_cfg.add_entry("mod1", cfg.model_cfg.get_entry("TM5-AP3-CTRL")) proc = ExperimentProcessor(cfg) proc.run() - del cfg.model_cfg["mod1"] + cfg.model_cfg.remove_entry("mod1") modified = proc.exp_output.clean_json_files() assert len(modified) == 13 @@ -305,10 +305,10 @@ def test_Experiment_Output_clean_json_files_CFG1_INVALIDMOD(eval_config: dict): @pytest.mark.parametrize("cfg", ["cfgexp1"]) def test_Experiment_Output_clean_json_files_CFG1_INVALIDOBS(eval_config: dict): cfg = EvalSetup(**eval_config) - cfg.obs_cfg["obs1"] = cfg.obs_cfg["AERONET-Sun"] + cfg.obs_cfg.add_entry("obs1", cfg.obs_cfg.get_entry("AERONET-Sun")) proc = ExperimentProcessor(cfg) proc.run() - del cfg.obs_cfg["obs1"] + cfg.obs_cfg.remove_entry("obs1") modified = proc.exp_output.clean_json_files() assert len(modified) == 13 @@ -354,7 +354,7 @@ def test_Experiment_Output_drop_stats_and_decimals( stats_decimals, ) cfg = EvalSetup(**eval_config) - cfg.model_cfg["mod1"] = cfg.model_cfg["TM5-AP3-CTRL"] + cfg.model_cfg.add_entry("mod1", cfg.model_cfg.get_entry("TM5-AP3-CTRL")) proc = ExperimentProcessor(cfg) proc.run() path = Path(proc.exp_output.exp_dir) diff --git a/tests/aeroval/test_experiment_processor.py b/tests/aeroval/test_experiment_processor.py index 5374e831a..af0be63c0 100644 --- a/tests/aeroval/test_experiment_processor.py +++ b/tests/aeroval/test_experiment_processor.py @@ -31,6 +31,7 @@ def test_ExperimentProcessor_run(processor: ExperimentProcessor): processor.run() +# Temporary until ObsCollection implemented simiarly then can run same test @geojson_unavail @pytest.mark.parametrize( "cfg,kwargs,error", @@ -47,7 +48,9 @@ def test_ExperimentProcessor_run(processor: ExperimentProcessor): ), ], ) -def test_ExperimentProcessor_run_error(processor: ExperimentProcessor, kwargs: dict, error: str): +def test_ExperimentProcessor_run_error_obs_name( + processor: ExperimentProcessor, kwargs: dict, error: str +): with pytest.raises(KeyError) as e: processor.run(**kwargs) assert str(e.value) == error From 91ee520d2afd1a2644f543d22e348c06f7d9905a Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sun, 17 Nov 2024 17:42:22 +0100 Subject: [PATCH 3/9] implement Collection ABC --- pyaerocom/aeroval/collections.py | 178 ++++++++---------------- tests/aeroval/test_experiment_output.py | 2 +- 2 files changed, 59 insertions(+), 121 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index 6a1bb63a1..378133ec9 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -167,137 +167,30 @@ def all_vert_types(self): return list({x.obs_vert_type for x in self.values()}) -# class ModelCollection(BaseCollection): -# """ -# Dict-like object that represents a collection of model entries - -# Keys are model names, values are instances of :class:`ModelEntry`. Values -# can also be assigned as dict and will automatically be converted into -# instances of :class:`ModelEntry`. - - -# Note -# ---- -# Entries must not necessarily be only models but may also be observations. -# Entries provided in this collection refer to the x-axis in the AeroVal -# heatmap display and must fulfill the protocol defined by -# :class:`ModelEntry`. - -# """ - -# SETTER_CONVERT = {dict: ModelEntry} - -# def get_entry(self, key) -> ModelEntry: -# """Get model entry configuration - -# Since the configuration files for experiments are in json format, they -# do not allow the storage of executable custom methods for model data -# reading. Instead, these can be specified in a python module that may -# be specified via :attr:`add_methods_file` and that contains a -# dictionary `FUNS` that maps the method names with the callable methods. - -# As a result, this means that, by default, custom read methods for -# individual models in :attr:`model_config` do not contain the -# callable methods but only the names. This method will take care of -# handling this and will return a dictionary where potential custom -# method strings have been converted to the corresponding callable -# methods. - -# Parameters -# ---------- -# model_name : str -# name of model - -# Returns -# ------- -# dict -# Dictionary that specifies the model setup ready for the analysis -# """ -# try: -# entry = self[key] -# entry.model_name = key -# return entry -# except (KeyError, AttributeError): -# raise EntryNotAvailable(f"no such entry {key}") - -# @property -# def web_interface_names(self) -> list: -# """ -# List of web interface names for each obs entry - -# Returns -# ------- -# list -# """ -# return self.keylist() - - -class ModelCollection: - """ - Object that represents a collection of model entries - - Keys are model names, values are instances of :class:`ModelEntry`. Values - can also be assigned as dict and will automatically be converted into - instances of :class:`ModelEntry`. - - Note - ---- - Entries must not necessarily be only models but may also be observations. - Entries provided in this collection refer to the x-axis in the AeroVal - heatmap display and must fulfill the protocol defined by - :class:`ModelEntry`. - """ - +class Collection(abc.ABC): def __init__(self): self._entries = {} def __iter__(self): """ - Yield each ModelEntry in the collection. + Yield each entry in the collection. """ yield from self._entries.values() - def add_entry(self, key: str, entry: dict | ModelEntry): - if isinstance(entry, dict): - entry = ModelEntry(**entry) - entry.model_name = key - self._entries[key] = entry - - def remove_entry(self, key: str): - if key in self._entries: - del self._entries[key] - - def get_entry(self, key: str) -> ModelEntry: - """ - Get model entry configuration - Parameters - ---------- - model_name : str - name of model - - Returns - ------- - dict - Dictionary that specifies the model setup ready for the analysis - """ - if key in self._entries: - return self._entries[key] - else: - raise EntryNotAvailable(f"no such entry {key}") + @abc.abstractmethod + def add_entry(self, key, value) -> None: + pass - # def keylist(self) -> list: - # """ - # Return list of keys in collection. + @abc.abstractmethod + def remove_entry(self, key) -> None: + pass - # Returns - # ------- - # list - # list of keys in collection. - # """ - # return list(self._entries.keys()) + @abc.abstractmethod + def get_entry(self, key) -> object: + pass - def keylist(self, name_or_pattern: str = None) -> list: - """Find model names that match input search pattern(s) + def keylist(self, name_or_pattern: str = None) -> list[str]: + """Find model / obs names that match input search pattern(s) Parameters ---------- @@ -340,3 +233,48 @@ def web_interface_names(self) -> list: def to_json(self) -> str: """Serialize ModelCollection to a JSON string.""" return json.dumps({k: v.dict() for k, v in self._entries.items()}, default=str) + + +class ModelCollection(Collection): + """ + Object that represents a collection of model entries + + Keys are model names, values are instances of :class:`ModelEntry`. Values + can also be assigned as dict and will automatically be converted into + instances of :class:`ModelEntry`. + + Note + ---- + Entries must not necessarily be only models but may also be observations. + Entries provided in this collection refer to the x-axis in the AeroVal + heatmap display and must fulfill the protocol defined by + :class:`ModelEntry`. + """ + + def add_entry(self, key: str, entry: dict | ModelEntry): + if isinstance(entry, dict): + entry = ModelEntry(**entry) + entry.model_name = key + self._entries[key] = entry + + def remove_entry(self, key: str): + if key in self._entries: + del self._entries[key] + + def get_entry(self, key: str) -> ModelEntry: + """ + Get model entry configuration + Parameters + ---------- + model_name : str + name of model + + Returns + ------- + dict + Dictionary that specifies the model setup ready for the analysis + """ + if key in self._entries: + return self._entries[key] + else: + raise EntryNotAvailable(f"no such entry {key}") diff --git a/tests/aeroval/test_experiment_output.py b/tests/aeroval/test_experiment_output.py index 868937929..49f5b065c 100644 --- a/tests/aeroval/test_experiment_output.py +++ b/tests/aeroval/test_experiment_output.py @@ -305,7 +305,7 @@ def test_Experiment_Output_clean_json_files_CFG1_INVALIDMOD(eval_config: dict): @pytest.mark.parametrize("cfg", ["cfgexp1"]) def test_Experiment_Output_clean_json_files_CFG1_INVALIDOBS(eval_config: dict): cfg = EvalSetup(**eval_config) - cfg.obs_cfg.add_entry("obs1", cfg.obs_cfg.get_entry("AERONET-Sun")) + cfg.obs_cfg["obs1"] = cfg.obs_cfg["AERONET-Sun"] proc = ExperimentProcessor(cfg) proc.run() cfg.obs_cfg.remove_entry("obs1") From f20ec94ffcc8eaa39ff3c3f7abccb701a4ed1223 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sun, 17 Nov 2024 19:11:22 +0100 Subject: [PATCH 4/9] 1 CAMS2_83 test failing. getter and setter for get_web_interface_name? --- pyaerocom/aeroval/collections.py | 162 +++++++++++++----------- pyaerocom/aeroval/experiment_output.py | 4 +- pyaerocom/aeroval/helpers.py | 4 +- pyaerocom/aeroval/setup_classes.py | 22 +++- pyaerocom/aeroval/superobs_engine.py | 5 +- tests/aeroval/test_aeroval_HIGHLEV.py | 4 +- tests/aeroval/test_collections.py | 18 ++- tests/aeroval/test_experiment_output.py | 3 +- tests/aeroval/test_helpers.py | 2 +- 9 files changed, 124 insertions(+), 100 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index 378133ec9..4bff49f8c 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -77,7 +77,75 @@ def web_interface_names(self) -> list: pass -class ObsCollection(BaseCollection): +class Collection(abc.ABC): + def __init__(self): + self._entries = {} + + def __iter__(self): + """ + Yield each entry in the collection. + """ + yield from self._entries.values() + + @abc.abstractmethod + def add_entry(self, key, value) -> None: + pass + + @abc.abstractmethod + def remove_entry(self, key) -> None: + pass + + @abc.abstractmethod + def get_entry(self, key) -> object: + pass + + def keylist(self, name_or_pattern: str = None) -> list[str]: + """Find model / obs names that match input search pattern(s) + + Parameters + ---------- + name_or_pattern : str, optional + Name or pattern specifying search string. + + Returns + ------- + list + list of keys in collection that match input requirements. If + `name_or_pattern` is None, all keys will be returned. + + Raises + ------ + KeyError + if no matches can be found + """ + if name_or_pattern is None: + name_or_pattern = "*" + + matches = [] + for key in self._entries.keys(): + if fnmatch(key, name_or_pattern) and key not in matches: + matches.append(key) + if len(matches) == 0: + raise KeyError(f"No matches could be found that match input {name_or_pattern}") + return matches + + @property + def web_interface_names(self) -> list: + """ + List of web interface names for each obs entry + + Returns + ------- + list + """ + return self.keylist() + + def to_json(self) -> str: + """Serialize ModelCollection to a JSON string.""" + return json.dumps({k: v.dict() for k, v in self._entries.items()}, default=str) + + +class ObsCollection(Collection): """ Dict-like object that represents a collection of obs entries @@ -94,9 +162,16 @@ class ObsCollection(BaseCollection): """ - SETTER_CONVERT = {dict: ObsEntry} + def add_entry(self, key: str, entry: dict | ObsEntry): + if isinstance(entry, dict): + entry = ObsEntry(**entry) + self._entries[key] = entry - def get_entry(self, key) -> object: + def remove_entry(self, key: str): + if key in self._entries: + del self._entries[key] + + def get_entry(self, key) -> ObsEntry: """ Getter for obs entries @@ -106,7 +181,7 @@ def get_entry(self, key) -> object: if input name is not in this collection """ try: - entry = self[key] + entry = self._entries[key] entry.obs_name = self.get_web_interface_name(key) return entry except (KeyError, AttributeError): @@ -123,7 +198,7 @@ def get_all_vars(self) -> list[str]: """ vars = [] - for ocfg in self.values(): + for ocfg in self._entries.values(): vars.extend(ocfg.get_all_vars()) return sorted(list(set(vars))) @@ -148,7 +223,12 @@ def get_web_interface_name(self, key): corresponding name """ - return self[key].web_interface_name if self[key].web_interface_name is not None else key + # LB: private method? + return ( + self._entries[key].web_interface_name + if self._entries[key].web_interface_name is not None + else key + ) @property def web_interface_names(self) -> list: @@ -164,75 +244,7 @@ def web_interface_names(self) -> list: @property def all_vert_types(self): """List of unique vertical types specified in this collection""" - return list({x.obs_vert_type for x in self.values()}) - - -class Collection(abc.ABC): - def __init__(self): - self._entries = {} - - def __iter__(self): - """ - Yield each entry in the collection. - """ - yield from self._entries.values() - - @abc.abstractmethod - def add_entry(self, key, value) -> None: - pass - - @abc.abstractmethod - def remove_entry(self, key) -> None: - pass - - @abc.abstractmethod - def get_entry(self, key) -> object: - pass - - def keylist(self, name_or_pattern: str = None) -> list[str]: - """Find model / obs names that match input search pattern(s) - - Parameters - ---------- - name_or_pattern : str, optional - Name or pattern specifying search string. - - Returns - ------- - list - list of keys in collection that match input requirements. If - `name_or_pattern` is None, all keys will be returned. - - Raises - ------ - KeyError - if no matches can be found - """ - if name_or_pattern is None: - name_or_pattern = "*" - - matches = [] - for key in self._entries.keys(): - if fnmatch(key, name_or_pattern) and key not in matches: - matches.append(key) - if len(matches) == 0: - raise KeyError(f"No matches could be found that match input {name_or_pattern}") - return matches - - @property - def web_interface_names(self) -> list: - """ - List of web interface names for each obs entry - - Returns - ------- - list - """ - return self.keylist() - - def to_json(self) -> str: - """Serialize ModelCollection to a JSON string.""" - return json.dumps({k: v.dict() for k, v in self._entries.items()}, default=str) + return list({x.obs_vert_type for x in self._entries.values()}) class ModelCollection(Collection): diff --git a/pyaerocom/aeroval/experiment_output.py b/pyaerocom/aeroval/experiment_output.py index 9e6ce980e..a15d524c2 100644 --- a/pyaerocom/aeroval/experiment_output.py +++ b/pyaerocom/aeroval/experiment_output.py @@ -751,8 +751,8 @@ def _is_part_of_experiment(self, obs_name, obs_var, mod_name, mod_var) -> bool: # occurence of web_interface_name). allobs = self.cfg.obs_cfg obs_matches = [] - for key, ocfg in allobs.items(): - if obs_name == allobs.get_web_interface_name(key): + for ocfg in allobs: + if obs_name == allobs.get_web_interface_name(ocfg.obs_name): obs_matches.append(ocfg) if len(obs_matches) == 0: self._invalid["obs"].append(obs_name) diff --git a/pyaerocom/aeroval/helpers.py b/pyaerocom/aeroval/helpers.py index 7cc621be0..b4a8fa43f 100644 --- a/pyaerocom/aeroval/helpers.py +++ b/pyaerocom/aeroval/helpers.py @@ -162,7 +162,7 @@ def make_dummy_model(obs_list: list, cfg) -> str: tmp_var_obj = Variable() # Loops over variables in obs for obs in obs_list: - for var in cfg.obs_cfg[obs].obs_vars: + for var in cfg.obs_cfg.get_entry(obs).obs_vars: # Create dummy cube dummy_cube = make_dummy_cube(var, start_yr=start, stop_yr=stop, freq=freq) @@ -185,7 +185,7 @@ def make_dummy_model(obs_list: list, cfg) -> str: for dummy_grid_yr in yr_gen: # Add to netcdf yr = dummy_grid_yr.years_avail()[0] - vert_code = cfg.obs_cfg[obs].obs_vert_type + vert_code = cfg.obs_cfg.get_entry(obs).obs_vert_type save_name = dummy_grid_yr.aerocom_savename(model_id, var, vert_code, yr, freq) dummy_grid_yr.to_netcdf(outdir, savename=save_name) diff --git a/pyaerocom/aeroval/setup_classes.py b/pyaerocom/aeroval/setup_classes.py index 5a884eef4..97d1cdf8a 100644 --- a/pyaerocom/aeroval/setup_classes.py +++ b/pyaerocom/aeroval/setup_classes.py @@ -501,17 +501,25 @@ def colocation_opts(self) -> ColocationSetup: # These attributes require special attention b/c they're not based on Pydantic's BaseModel class. - obs_cfg: ObsCollection | dict = ObsCollection() + # obs_cfg: ObsCollection | dict = ObsCollection() - @field_validator("obs_cfg") - def validate_obs_cfg(cls, v): - if isinstance(v, ObsCollection): - return v - return ObsCollection(v) + # @field_validator("obs_cfg") + # def validate_obs_cfg(cls, v): + # if isinstance(v, ObsCollection): + # return v + # return ObsCollection(v) + + @computed_field + @cached_property + def obs_cfg(self) -> ObsCollection: + oc = ObsCollection() + for k, v in self.model_extra.get("obs_cfg", {}).items(): + oc.add_entry(k, v) + return oc @field_serializer("obs_cfg") def serialize_obs_cfg(self, obs_cfg: ObsCollection): - return obs_cfg.json_repr() + return obs_cfg.to_json() # model_cfg: ModelCollection | dict = ModelCollection() diff --git a/pyaerocom/aeroval/superobs_engine.py b/pyaerocom/aeroval/superobs_engine.py index fdae8d8b7..46364caaa 100644 --- a/pyaerocom/aeroval/superobs_engine.py +++ b/pyaerocom/aeroval/superobs_engine.py @@ -75,8 +75,9 @@ def _run_var(self, model_name, obs_name, var_name, try_colocate_if_missing): coldata_files = [] coldata_resolutions = [] vert_codes = [] - obs_needed = self.cfg.obs_cfg[obs_name].obs_id - vert_code = self.cfg.obs_cfg.get_entry(obs_name).obs_vert_type + obs_entry = self.cfg.obs_cfg.get_entry(obs_name) + obs_needed = obs_entry.obs_id + vert_code = obs_entry.obs_vert_type for oname in obs_needed: fp, ts_type, vert_code = self._get_coldata_fileinfo( model_name, oname, var_name, try_colocate_if_missing diff --git a/tests/aeroval/test_aeroval_HIGHLEV.py b/tests/aeroval/test_aeroval_HIGHLEV.py index 34caab906..c2b07ed99 100644 --- a/tests/aeroval/test_aeroval_HIGHLEV.py +++ b/tests/aeroval/test_aeroval_HIGHLEV.py @@ -116,8 +116,8 @@ def test_superobs_different_resolutions(eval_config: dict): cfg.model_cfg.get_entry("TM5-AP3-CTRL").model_ts_type_read = None cfg.model_cfg.get_entry("TM5-AP3-CTRL").flex_ts_type = True - cfg.obs_cfg["AERONET-Sun"].ts_type = "daily" - cfg.obs_cfg["AERONET-SDA"].ts_type = "monthly" + cfg.obs_cfg.get_entry("AERONET-Sun").ts_type = "daily" + cfg.obs_cfg.get_entry("AERONET-SDA").ts_type = "monthly" proc = ExperimentProcessor(cfg) proc.exp_output.delete_experiment_data(also_coldata=True) diff --git a/tests/aeroval/test_collections.py b/tests/aeroval/test_collections.py index c00e1016c..b614ceabc 100644 --- a/tests/aeroval/test_collections.py +++ b/tests/aeroval/test_collections.py @@ -2,17 +2,21 @@ def test_obscollection(): - oc = ObsCollection(model1=dict(obs_id="bla", obs_vars="od550aer", obs_vert_type="Column")) + oc = ObsCollection() + oc.add_entry("model1", dict(obs_id="bla", obs_vars="od550aer", obs_vert_type="Column")) assert oc - oc["AN-EEA-MP"] = dict( - is_superobs=True, - obs_id=("AirNow", "EEA-NRT-rural", "MarcoPolo"), - obs_vars=["concpm10", "concpm25", "vmro3", "vmrno2"], - obs_vert_type="Surface", + oc.add_entry( + "AN-EEA-MP", + dict( + is_superobs=True, + obs_id=("AirNow", "EEA-NRT-rural", "MarcoPolo"), + obs_vars=["concpm10", "concpm25", "vmro3", "vmrno2"], + obs_vert_type="Surface", + ), ) - assert "AN-EEA-MP" in oc + assert "AN-EEA-MP" in oc.keylist() def test_modelcollection(): diff --git a/tests/aeroval/test_experiment_output.py b/tests/aeroval/test_experiment_output.py index 49f5b065c..522516948 100644 --- a/tests/aeroval/test_experiment_output.py +++ b/tests/aeroval/test_experiment_output.py @@ -305,10 +305,9 @@ def test_Experiment_Output_clean_json_files_CFG1_INVALIDMOD(eval_config: dict): @pytest.mark.parametrize("cfg", ["cfgexp1"]) def test_Experiment_Output_clean_json_files_CFG1_INVALIDOBS(eval_config: dict): cfg = EvalSetup(**eval_config) - cfg.obs_cfg["obs1"] = cfg.obs_cfg["AERONET-Sun"] + cfg.obs_cfg.add_entry("obs1", cfg.obs_cfg.get_entry("AERONET-Sun")) proc = ExperimentProcessor(cfg) proc.run() - cfg.obs_cfg.remove_entry("obs1") modified = proc.exp_output.clean_json_files() assert len(modified) == 13 diff --git a/tests/aeroval/test_helpers.py b/tests/aeroval/test_helpers.py index 085efe943..c2af1998a 100644 --- a/tests/aeroval/test_helpers.py +++ b/tests/aeroval/test_helpers.py @@ -137,6 +137,6 @@ def test__get_min_max_year_periods_error(): @pytest.mark.parametrize("cfg", ["cfgexp1"]) def test_make_dummy_model(eval_config: dict): cfg = EvalSetup(**eval_config) - assert cfg.obs_cfg["AERONET-Sun"] + assert cfg.obs_cfg.get_entry("AERONET-Sun") model_id = make_dummy_model(["AERONET-Sun"], cfg) assert model_id == "dummy_model" From 3105b77bb40ff2bb18d409ee97d89e2e2bc1ea2d Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sun, 17 Nov 2024 21:17:30 +0100 Subject: [PATCH 5/9] rm BrowseDict-based BaseCollection --- pyaerocom/aeroval/collections.py | 86 +++----------------------------- 1 file changed, 8 insertions(+), 78 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index 4bff49f8c..5694b58a8 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -2,82 +2,12 @@ from fnmatch import fnmatch import json -from pyaerocom._lowlevel_helpers import BrowseDict from pyaerocom.aeroval.modelentry import ModelEntry from pyaerocom.aeroval.obsentry import ObsEntry -from pyaerocom.exceptions import EntryNotAvailable, EvalEntryNameError +from pyaerocom.exceptions import EntryNotAvailable -class BaseCollection(BrowseDict, abc.ABC): - #: maximum length of entry names - MAXLEN_KEYS = 25 - #: Invalid chars in entry names - FORBIDDEN_CHARS_KEYS = [] - - # TODO: Wait a few release cycles after v0.23.0 and see if this can be removed - def _check_entry_name(self, key): - if any([x in key for x in self.FORBIDDEN_CHARS_KEYS]): - raise EvalEntryNameError( - f"Invalid name: {key}. Must not contain any of the following " - f"characters: {self.FORBIDDEN_CHARS_KEYS}" - ) - - def __setitem__(self, key, value): - self._check_entry_name(key) - super().__setitem__(key, value) - - def keylist(self, name_or_pattern: str = None) -> list: - """Find model names that match input search pattern(s) - - Parameters - ---------- - name_or_pattern : str, optional - Name or pattern specifying search string. - - Returns - ------- - list - list of keys in collection that match input requirements. If - `name_or_pattern` is None, all keys will be returned. - - Raises - ------ - KeyError - if no matches can be found - """ - if name_or_pattern is None: - name_or_pattern = "*" - - matches = [] - for key in self.keys(): - if fnmatch(key, name_or_pattern) and key not in matches: - matches.append(key) - if len(matches) == 0: - raise KeyError(f"No matches could be found that match input {name_or_pattern}") - return matches - - @abc.abstractmethod - def get_entry(self, key) -> object: - """ - Getter for eval entries - - Raises - ------ - KeyError - if input name is not in this collection - """ - pass - - @property - @abc.abstractmethod - def web_interface_names(self) -> list: - """ - List of webinterface names for - """ - pass - - -class Collection(abc.ABC): +class BaseCollection(abc.ABC): def __init__(self): self._entries = {} @@ -145,9 +75,9 @@ def to_json(self) -> str: return json.dumps({k: v.dict() for k, v in self._entries.items()}, default=str) -class ObsCollection(Collection): +class ObsCollection(BaseCollection): """ - Dict-like object that represents a collection of obs entries + Object that represents a collection of obs entries Keys are obs names, values are instances of :class:`ObsEntry`. Values can also be assigned as dict and will automatically be converted into @@ -223,10 +153,10 @@ def get_web_interface_name(self, key): corresponding name """ - # LB: private method? + entry = self._entries.get(key) return ( - self._entries[key].web_interface_name - if self._entries[key].web_interface_name is not None + entry.web_interface_name + if entry is not None and entry.web_interface_name is not None else key ) @@ -247,7 +177,7 @@ def all_vert_types(self): return list({x.obs_vert_type for x in self._entries.values()}) -class ModelCollection(Collection): +class ModelCollection(BaseCollection): """ Object that represents a collection of model entries From c3e6a72edd90340172f66329662d858f725d6bd8 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sun, 17 Nov 2024 21:19:21 +0100 Subject: [PATCH 6/9] rm dead code --- pyaerocom/aeroval/setup_classes.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/pyaerocom/aeroval/setup_classes.py b/pyaerocom/aeroval/setup_classes.py index 97d1cdf8a..a75b36a8e 100644 --- a/pyaerocom/aeroval/setup_classes.py +++ b/pyaerocom/aeroval/setup_classes.py @@ -501,14 +501,6 @@ def colocation_opts(self) -> ColocationSetup: # These attributes require special attention b/c they're not based on Pydantic's BaseModel class. - # obs_cfg: ObsCollection | dict = ObsCollection() - - # @field_validator("obs_cfg") - # def validate_obs_cfg(cls, v): - # if isinstance(v, ObsCollection): - # return v - # return ObsCollection(v) - @computed_field @cached_property def obs_cfg(self) -> ObsCollection: @@ -521,14 +513,6 @@ def obs_cfg(self) -> ObsCollection: def serialize_obs_cfg(self, obs_cfg: ObsCollection): return obs_cfg.to_json() - # model_cfg: ModelCollection | dict = ModelCollection() - - # @field_validator("model_cfg") - # def validate_model_cfg(cls, v): - # if isinstance(v, ModelCollection): - # return v - # return ModelCollection(v) - @computed_field @cached_property def model_cfg(self) -> ModelCollection: @@ -539,7 +523,6 @@ def model_cfg(self) -> ModelCollection: @field_serializer("model_cfg") def serialize_model_cfg(self, model_cfg: ModelCollection): - # return model_cfg.json_repr() return model_cfg.to_json() ########################### From 72a9b4e51745d5f858dd4e505a6f39b233321339 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sun, 17 Nov 2024 21:35:18 +0100 Subject: [PATCH 7/9] minor type hinting --- pyaerocom/aeroval/collections.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index 5694b58a8..487e07c04 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -101,7 +101,7 @@ def remove_entry(self, key: str): if key in self._entries: del self._entries[key] - def get_entry(self, key) -> ObsEntry: + def get_entry(self, key: str) -> ObsEntry: """ Getter for obs entries @@ -132,7 +132,7 @@ def get_all_vars(self) -> list[str]: vars.extend(ocfg.get_all_vars()) return sorted(list(set(vars))) - def get_web_interface_name(self, key): + def get_web_interface_name(self, key: str) -> str: """ Get webinterface name for entry From 14bd371f8cbbc8e1d5c4ae8f396e0b668c8939f2 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Mon, 18 Nov 2024 08:23:48 +0100 Subject: [PATCH 8/9] add doc strings --- pyaerocom/aeroval/collections.py | 46 +++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index 487e07c04..ff16cabfc 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -9,24 +9,64 @@ class BaseCollection(abc.ABC): def __init__(self): + """ + Initialize an instance of BaseCollection. + The instance maintains a dictionary of entries. + """ self._entries = {} def __iter__(self): """ - Yield each entry in the collection. + Iterates over each entry in the collection. + + Yields + ------ + object + The next entry in the collection. """ yield from self._entries.values() @abc.abstractmethod def add_entry(self, key, value) -> None: + """ + Abstract method to add an entry to the collection. + + Parameters + ---------- + key: Hashable + The key of the entry. + value: object + The value of the entry. + """ pass @abc.abstractmethod def remove_entry(self, key) -> None: + """ + Abstract method to remove an entry from the collection. + + Parameters + ---------- + key: Hashable + The key of the entry to be removed. + """ pass @abc.abstractmethod def get_entry(self, key) -> object: + """ + Abstract method to get an entry from the collection. + + Parameters + ---------- + key: Hashable + The key of the entry to retrieve. + + Returns + ------- + object + The entry associated with the provided key. + """ pass def keylist(self, name_or_pattern: str = None) -> list[str]: @@ -79,7 +119,7 @@ class ObsCollection(BaseCollection): """ Object that represents a collection of obs entries - Keys are obs names, values are instances of :class:`ObsEntry`. Values can + "Keys" are obs names, values are instances of :class:`ObsEntry`. Values can also be assigned as dict and will automatically be converted into instances of :class:`ObsEntry`. @@ -181,7 +221,7 @@ class ModelCollection(BaseCollection): """ Object that represents a collection of model entries - Keys are model names, values are instances of :class:`ModelEntry`. Values + "Keys" are model names, values are instances of :class:`ModelEntry`. Values can also be assigned as dict and will automatically be converted into instances of :class:`ModelEntry`. From d68a9f8674f59fc9ae8b39bd7da971b91ee9706a Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Mon, 18 Nov 2024 08:24:11 +0100 Subject: [PATCH 9/9] add tests --- tests/aeroval/test_collections.py | 59 +++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/tests/aeroval/test_collections.py b/tests/aeroval/test_collections.py index b614ceabc..1327ca6b8 100644 --- a/tests/aeroval/test_collections.py +++ b/tests/aeroval/test_collections.py @@ -1,7 +1,10 @@ from pyaerocom.aeroval.collections import ObsCollection, ModelCollection +from pyaerocom.aeroval.obsentry import ObsEntry +from pyaerocom.aeroval.modelentry import ModelEntry +import pytest -def test_obscollection(): +def test_obscollection_init_and_add_entry(): oc = ObsCollection() oc.add_entry("model1", dict(obs_id="bla", obs_vars="od550aer", obs_vert_type="Column")) assert oc @@ -19,7 +22,42 @@ def test_obscollection(): assert "AN-EEA-MP" in oc.keylist() -def test_modelcollection(): +def test_obscollection_add_and_get_entry(): + collection = ObsCollection() + entry = ObsEntry(obs_id="obs1", obs_vars=("var1",)) + collection.add_entry("key1", entry) + retrieved_entry = collection.get_entry("key1") + assert retrieved_entry == entry + + +def test_obscollection_add_and_remove_entry(): + collection = ObsCollection() + entry = ObsEntry(obs_id="obs1", obs_vars=("var1",)) + collection.add_entry("key1", entry) + collection.remove_entry("key1") + with pytest.raises(KeyError): + collection.get_entry("key1") + + +def test_obscollection_get_web_interface_name(): + collection = ObsCollection() + entry = ObsEntry(obs_id="obs1", obs_vars=("var1",), web_interface_name="web_name") + collection.add_entry("key1", entry) + assert collection.get_web_interface_name("key1") == "web_name" + + +def test_obscollection_all_vert_types(): + collection = ObsCollection() + entry1 = ObsEntry( + obs_id="obs1", obs_vars=("var1",), obs_vert_type="Surface" + ) # Assuming ObsEntry has an obs_vert_type attribute + entry2 = ObsEntry(obs_id="obs2", obs_vars=("var2",), obs_vert_type="Profile") + collection.add_entry("key1", entry1) + collection.add_entry("key2", entry2) + assert set(collection.all_vert_types) == {"Surface", "Profile"} + + +def test_modelcollection_init_and_add_entry(): mc = ModelCollection() mc.add_entry("model1", dict(model_id="bla", obs_vars="od550aer", obs_vert_type="Column")) assert mc @@ -34,3 +72,20 @@ def test_modelcollection(): ) assert "ECMWF_OSUITE" in mc.keylist() + + +def test_modelcollection_add_and_get_entry(): + collection = ModelCollection() + entry = ModelEntry(model_id="mod1") + collection.add_entry("key1", entry) + retrieved_entry = collection.get_entry("key1") + assert retrieved_entry == entry + + +def test_modelcollection_add_and_remove_entry(): + collection = ModelCollection() + entry = ModelEntry(model_id="obs1") + collection.add_entry("key1", entry) + collection.remove_entry("key1") + with pytest.raises(KeyError): + collection.get_entry("key1")