Skip to content

Commit

Permalink
added basic cmorizer class (#121)
Browse files Browse the repository at this point in the history
* added basic cmorizer class

* added preprocess

* updated write functions

* added cfinfo function

---------

Co-authored-by: Lars Buntemeyer <[email protected]>
  • Loading branch information
larsbuntemeyer and Lars Buntemeyer authored Jul 1, 2023
1 parent a081603 commit 155e498
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 1 deletion.
3 changes: 2 additions & 1 deletion cordex/cmor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .cmor import cmorize_variable, prepare_variable
from .cmor import Cmorizer, cmorize_variable, prepare_variable
from .config import options, set_options
from .utils import (
mid_of_month,
Expand Down Expand Up @@ -31,4 +31,5 @@
"to_cftime",
"set_options",
"options",
"Cmorizer",
]
189 changes: 189 additions & 0 deletions cordex/cmor/cmor.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,3 +639,192 @@ def cmorize_variable(
return cmorize_cmor(
ds_prep, out_name, cmor_table, dataset_table, grids_table, inpath
)


class CmorizerBase:
def __init__(self):
pass


class Cmorizer(CmorizerBase):
def __init__(
self,
dataset_table=None,
mapping_table=None,
grids_table=None,
inpath=None,
replace_coords=False,
allow_units_convert=False,
allow_resample=False,
input_freq=None,
CORDEX_domain=None,
time_units=None,
rewrite_time_axis=False,
outpath=None,
):
"""Cmorizes a variable.
Parameters
----------
dataset_table: str or dict
Dataset table dict of filepath to dataset cmor table (json).
mapping_table: dict
Mapping of input variable names and meta data to CF out_name. Required if
the variable name in the input dataset is not equal to out_name.
inpath: str
Path to cmor tables, if ``inpath == None``, inpath is the path
to ``cmor_table``. This is required to find additional cmor tables,
like ``CMIP6_coordinates``, ``CMIP6_grids`` etc.
grids_table: str
Filepath to cmor grids table.
replace_coords: bool
Replace coordinates from input file and create them from archive
specifications.
allow_units_convert: bool
Allow units to be converted if they do not agree with the
units in the cmor table. Defaults to ``False`` to make the user aware of having
correct ``units`` attributes set.
allow_resample: bool
Allow to resample temporal data to the frequency required from the cmor table.
Handles both downsampling and upsampling. Defaults to ``False`` to make users aware
of the correct frequency input.
input_freq: str
The frequency of the input dataset in pandas notation. It ``None`` and the dataset
contains a time axis, the frequency will be determined automatically using
``pandas.infer_freq`` if possible.
CORDEX_domain: str
Cordex domain short name. If ``None``, the domain will be determined by the ``CORDEX_domain``
global attribute if available.
time_units: str
Time units of the cmorized dataset (``ISO 8601``).
If ``None``, time units will be set to default (``"days since 1950-01-01T00:00:00"``).
If ``time_units='input'``, the original time units of the input dataset are used.
rewrite_time_axis: bool
Rewrite the time axis to CF compliant timestamps.
outpath: str
Root directory for output (can be either a relative or full path). This will override
the outpath defined in the dataset cmor input table (``dataset_table``).
"""
super().__init__()
self.dataset_table = dataset_table
self.mapping_table = mapping_table
self.grids_table = grids_table
self.inpath = inpath
self.replace_coords = replace_coords
self.allow_units_convert = allow_units_convert
self.allow_resample = allow_resample
self.CORDEX_domain = CORDEX_domain
self.time_units = time_units
self.rewrite_time_axis = rewrite_time_axis
self.outpath = outpath

if op.isfile(self.dataset_table):
self.dataset_table = _read_table(self.dataset_table)

if outpath:
self.dataset_table["outpath"] = outpath

def preprocess(
self,
ds,
out_name,
cmor_table,
replace_coords=False,
allow_units_convert=False,
allow_resample=False,
input_freq=None,
CORDEX_domain=None,
time_units=None,
rewrite_time_axis=False,
use_cftime=False,
squeeze=True,
):
"""prepares a variable for cmorization."""
return prepare_variable(
ds,
out_name,
cmor_table,
mapping_table=self.mapping_table,
replace_coords=replace_coords or self.replace_coords,
allow_units_convert=allow_units_convert or self.allow_units_convert,
allow_resample=allow_resample or self.allow_resample,
input_freq=input_freq,
CORDEX_domain=CORDEX_domain or self.CORDEX_domain,
time_units=time_units or self.time_units,
rewrite_time_axis=rewrite_time_axis or self.rewrite_time_axis,
use_cftime=use_cftime,
squeeze=squeeze,
)

def _write_with_cmor(self, ds, out_name, cmor_table):
return cmorize_cmor(
ds, out_name, cmor_table, self.dataset_table, self.grids_table, self.inpath
)

def cmorize(self, ds, out_name, cmor_table):
"""Cmorizes a variable.
Parameters
----------
ds : xr.Dataset
Dataset containing at least the variable that should be cmorized.
out_name: str
CF out_name of the variable that should be cmorized. The corresponding variable name
in the dataset is looked up from the mapping_table if provided.
cmor_table : str or dict
Cmor table dict of filepath to cmor table (json).
dataset_table: str or dict
Dataset table dict of filepath to dataset cmor table (json).
mapping_table: dict
Mapping of input variable names and meta data to CF out_name. Required if
the variable name in the input dataset is not equal to out_name.
grids_table: str
Filepath to cmor grids table.
inpath: str
Path to cmor tables, if ``inpath == None``, inpath is the path
to ``cmor_table``. This is required to find additional cmor tables,
like ``CMIP6_coordinates``, ``CMIP6_grids`` etc.
replace_coords: bool
Replace coordinates from input file and create them from archive
specifications.
allow_units_convert: bool
Allow units to be converted if they do not agree with the
units in the cmor table. Defaults to ``False`` to make the user aware of having
correct ``units`` attributes set.
allow_resample: bool
Allow to resample temporal data to the frequency required from the cmor table.
Handles both downsampling and upsampling. Defaults to ``False`` to make users aware
of the correct frequency input.
input_freq: str
The frequency of the input dataset in pandas notation. It ``None`` and the dataset
contains a time axis, the frequency will be determined automatically using
``pandas.infer_freq`` if possible.
CORDEX_domain: str
Cordex domain short name. If ``None``, the domain will be determined by the ``CORDEX_domain``
global attribute if available.
time_units: str
Time units of the cmorized dataset (``ISO 8601``).
If ``None``, time units will be set to default (``"days since 1950-01-01T00:00:00"``).
If ``time_units='input'``, the original time units of the input dataset are used.
rewrite_time_axis: bool
Rewrite the time axis to CF compliant timestamps.
outpath: str
Root directory for output (can be either a relative or full path). This will override
the outpath defined in the dataset cmor input table (``dataset_table``).
**kwargs:
Argumets passed to prepare_variable.
Returns
-------
filename
Filepath to cmorized file.
"""
ds_prep = self.preprocess(ds, out_name, cmor_table)
return self._write_with_cmor(ds_prep, out_name, cmor_table)

def cfinfo(self, out_name, cmor_table):
if isinstance(cmor_table, str):
cmor_table = _read_table(cmor_table)
return _get_cfvarinfo(out_name, cmor_table)

0 comments on commit 155e498

Please sign in to comment.