From 5ac893ae93ecaa1d201ccbc92f178e43aef96b89 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Wed, 27 Mar 2024 14:29:07 +0000 Subject: [PATCH] Add fieldlist support (#15) * Add fieldlist support --- docs/conf.py | 14 ++ docs/examples/healpix_fieldlist.ipynb | 176 +------------------- docs/examples/octahedral_fieldlist.ipynb | 181 +-------------------- docs/gridspec.rst | 4 +- docs/index.rst | 12 +- docs/interpolate.rst | 23 +-- docs/release_notes/index.rst | 7 + docs/release_notes/version_0.3_updates.rst | 17 ++ earthkit/regrid/interpolate.py | 78 ++++++++- earthkit/regrid/sphinxext/xref.py | 48 ++++++ 10 files changed, 202 insertions(+), 358 deletions(-) create mode 100644 docs/release_notes/index.rst create mode 100644 docs/release_notes/version_0.3_updates.rst create mode 100644 earthkit/regrid/sphinxext/xref.py diff --git a/docs/conf.py b/docs/conf.py index e0581b6..e59cd41 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,6 +33,7 @@ extensions = [ "sphinx_rtd_theme", "nbsphinx", + "earthkit.regrid.sphinxext.xref", "earthkit.regrid.sphinxext.module_output", ] @@ -61,3 +62,16 @@ html_css_files = ["style.css"] # html_logo = "_static/earthkit-regrid.png" + + +xref_links = { + "earthkit": ("earthkit", "https://earthkit.readthedocs.io/en/latest/"), + "earthkit-data": ( + "earthkit-data", + "https://earthkit-data.readthedocs.io/en/latest/", + ), + "fieldlist": ( + "fieldlist", + "https://earthkit-data.readthedocs.io/en/latest/guide/data_format/grib.html", + ), +} diff --git a/docs/examples/healpix_fieldlist.ipynb b/docs/examples/healpix_fieldlist.ipynb index 83e1b54..22d7a0e 100644 --- a/docs/examples/healpix_fieldlist.ipynb +++ b/docs/examples/healpix_fieldlist.ipynb @@ -65,7 +65,7 @@ "source": [ "import os\n", "from earthkit.regrid import interpolate\n", - "from earthkit.data import from_source, FieldList\n", + "from earthkit.data import from_source\n", "\n", "# Get HEALPix nested GRIB data containing two fields.\n", "ds = from_source(\n", @@ -73,16 +73,11 @@ " \"https://get.ecmwf.int/repository/test-data/earthkit-regrid/examples/H8_nested_multi.grib2\")\n", "\n", "# the target grid is a global 5x5 degree regular latitude grid\n", - "target_gridspec = {\"grid\": [5,5]}\n", + "out_grid = {\"grid\": [5,5]}\n", "\n", "# perform interpolation for each field and add results \n", "# to a new fieldlist stored in memory\n", - "r = FieldList()\n", - "for f in ds:\n", - " v_res = interpolate(f.to_numpy(flatten=True), f.metadata().gridspec, target_gridspec,\n", - " method=\"linear\")\n", - " md_res = f.metadata().override(gridspec=target_gridspec)\n", - " r += ds.from_numpy(v_res, md_res)\n", + "r = interpolate(ds, out_grid=out_grid, method=\"linear\")\n", "\n", "d = r.data()\n", "lat = d[0]\n", @@ -541,19 +536,19 @@ " GRIB_subCentre: 0\n", " Conventions: CF-1.7\n", " institution: European Centre for Medium-Range Weather Forecasts\n", - " history: 2024-03-25T19:54 GRIB to CDM+CF via cfgrib-0.9.1...
  • GRIB_edition :
    2
    GRIB_centre :
    ecmf
    GRIB_centreDescription :
    European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre :
    0
    Conventions :
    CF-1.7
    institution :
    European Centre for Medium-Range Weather Forecasts
    history :
    2024-03-27T12:44 GRIB to CDM+CF via cfgrib-0.9.10.4/ecCodes-2.35.0 with {"source": "N/A", "filter_by_keys": {}, "encode_cf": ["parameter", "time", "geography", "vertical"]}
  • " ], "text/plain": [ "\n", @@ -583,7 +578,7 @@ " GRIB_subCentre: 0\n", " Conventions: CF-1.7\n", " institution: European Centre for Medium-Range Weather Forecasts\n", - " history: 2024-03-25T19:54 GRIB to CDM+CF via cfgrib-0.9.1..." + " history: 2024-03-27T12:44 GRIB to CDM+CF via cfgrib-0.9.1..." ] }, "execution_count": 3, @@ -627,159 +622,6 @@ "out_file = \"_res_H8_nested_to_5x5.grib\"\n", "r.save(out_file)" ] - }, - { - "cell_type": "markdown", - "id": "fabd4b1a-1477-49ea-a7b8-e380a9859d80", - "metadata": {}, - "source": [ - "If we do not want to store the resulting fieldlist in memory we can write each field to disk as soon as it is created." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "fb07acc1-9cd2-4ca6-9981-fcbcde81ea30", - "metadata": {}, - "outputs": [], - "source": [ - "# ensure the output does not exist\n", - "if os.path.exists(out_file):\n", - " os.remove(out_file)\n", - " \n", - "for f in ds:\n", - " v_res = interpolate(f.to_numpy(), f.metadata().gridspec, target_gridspec)\n", - " md_res = f.metadata().override(gridspec=target_gridspec)\n", - " FieldList.from_numpy(v_res, md_res).save(out_file, append=True)" - ] - }, - { - "cell_type": "markdown", - "id": "bfc957d6-7dc8-49c2-af95-42de73d75fd4", - "metadata": {}, - "source": [ - "We can then read the data from disk as a fieldlist with lazy memory loading. This fieldlist can be used for inspection, plotting and xarray conversion in the same way as shown above." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "2711bff9-6d50-4802-93fc-f26ff59518e3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
    0ecmf2theightAboveGround22024032312000fcNoneregular_ll
    1ecmf2theightAboveGround220240323120012fcNoneregular_ll
    \n", - "
    " - ], - "text/plain": [ - " centre shortName typeOfLevel level dataDate dataTime stepRange \\\n", - "0 ecmf 2t heightAboveGround 2 20240323 1200 0 \n", - "1 ecmf 2t heightAboveGround 2 20240323 1200 12 \n", - "\n", - " dataType number gridType \n", - "0 fc None regular_ll \n", - "1 fc None regular_ll " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds1 = from_source(\"file\", out_file)\n", - "ds1.ls()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "4a8249bd-6e56-4640-9306-a7824b89b550", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(4, 37, 72)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d = ds1.data()\n", - "d.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5b637624-4d65-44af-8b55-48e1952e92c7", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/examples/octahedral_fieldlist.ipynb b/docs/examples/octahedral_fieldlist.ipynb index 9dd4ceb..a99cc3d 100644 --- a/docs/examples/octahedral_fieldlist.ipynb +++ b/docs/examples/octahedral_fieldlist.ipynb @@ -59,7 +59,7 @@ "source": [ "import os\n", "from earthkit.regrid import interpolate\n", - "from earthkit.data import from_source, FieldList\n", + "from earthkit.data import from_source\n", "\n", "# Get octahedral reduced Gaussian GRIB data containing two fields.\n", "ds = from_source(\n", @@ -67,15 +67,11 @@ " \"https://get.ecmwf.int/repository/test-data/earthkit-regrid/examples/O32_multi.grib2\")\n", "\n", "# the target grid is a global 5x5 degree regular latitude grid\n", - "target_gridspec = {\"grid\": [5,5]}\n", + "out_grid = {\"grid\": [5,5]}\n", "\n", "# perform interpolation for each field and add results \n", "# to a new fieldlist stored in memory\n", - "r = FieldList()\n", - "for f in ds:\n", - " v_res = interpolate(f.to_numpy(), f.metadata().gridspec, target_gridspec)\n", - " md_res = f.metadata().override(gridspec=target_gridspec)\n", - " r += ds.from_numpy(v_res, md_res)\n", + "r = interpolate(ds, out_grid=out_grid, method=\"linear\")\n", "\n", "d = r.data()\n", "lat = d[0]\n", @@ -557,19 +553,19 @@ " GRIB_subCentre: 0\n", " Conventions: CF-1.7\n", " institution: European Centre for Medium-Range Weather Forecasts\n", - " history: 2024-03-25T12:04 GRIB to CDM+CF via cfgrib-0.9.1...
  • GRIB_edition :
    2
    GRIB_centre :
    ecmf
    GRIB_centreDescription :
    European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre :
    0
    Conventions :
    CF-1.7
    institution :
    European Centre for Medium-Range Weather Forecasts
    history :
    2024-03-27T12:39 GRIB to CDM+CF via cfgrib-0.9.10.4/ecCodes-2.35.0 with {"source": "N/A", "filter_by_keys": {}, "encode_cf": ["parameter", "time", "geography", "vertical"]}
  • " ], "text/plain": [ "\n", @@ -599,7 +595,7 @@ " GRIB_subCentre: 0\n", " Conventions: CF-1.7\n", " institution: European Centre for Medium-Range Weather Forecasts\n", - " history: 2024-03-25T12:04 GRIB to CDM+CF via cfgrib-0.9.1..." + " history: 2024-03-27T12:39 GRIB to CDM+CF via cfgrib-0.9.1..." ] }, "execution_count": 4, @@ -637,165 +633,6 @@ "out_file = \"_res_O32_to_5x5.grib\"\n", "r.save(out_file)" ] - }, - { - "cell_type": "markdown", - "id": "738cc74a-e7f3-4fae-b40d-1a1cbeaf3097", - "metadata": {}, - "source": [ - "If we do not want to store the resulting fieldlist in memory we can write each field to disk as soon as it is created." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "831d257d-f5f0-4dcc-b31e-23ef642099d0", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# ensure the output does not exist\n", - "if os.path.exists(out_file):\n", - " os.remove(out_file)\n", - " \n", - "for f in ds:\n", - " v_res = interpolate(f.to_numpy(), f.metadata().gridspec, target_gridspec)\n", - " md_res = f.metadata().override(gridspec=target_gridspec)\n", - " FieldList.from_numpy(v_res, md_res).save(out_file, append=True)" - ] - }, - { - "cell_type": "markdown", - "id": "37d064c7-8eed-4c14-99d4-92e56bbc5ff8", - "metadata": {}, - "source": [ - "We can then read the data from disk as a fieldlist with lazy memory loading. This fieldlist can be used for inspection, plotting and xarray conversion in the same way as shown above." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b52059d6-c1a8-4d58-a809-4ed1b72c840e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
    0ecmf2theightAboveGround22024032312000fcNoneregular_ll
    1ecmf2theightAboveGround220240323120012fcNoneregular_ll
    \n", - "
    " - ], - "text/plain": [ - " centre shortName typeOfLevel level dataDate dataTime stepRange \\\n", - "0 ecmf 2t heightAboveGround 2 20240323 1200 0 \n", - "1 ecmf 2t heightAboveGround 2 20240323 1200 12 \n", - "\n", - " dataType number gridType \n", - "0 fc None regular_ll \n", - "1 fc None regular_ll " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds1 = from_source(\"file\", out_file)\n", - "ds1.ls()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "ead700a4-b956-48d1-bb20-0f351ea72ec5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(4, 37, 72)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d = ds1.data()\n", - "d.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bac4519-b990-4ac9-8ab1-391723c72150", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/gridspec.rst b/docs/gridspec.rst index 5e992d3..2979bc1 100644 --- a/docs/gridspec.rst +++ b/docs/gridspec.rst @@ -45,9 +45,9 @@ global regular latitude-longitude grid The ``grid`` format is:: - [DX, DY] + [DLON, DLAT] -where *DX* and *DY* are the increments in degrees in longitudes and latitudes, respectively. This grid definition uses the default scanning mode used at ECMWF: values start at North-West and follow in consecutive rows from West to East +where *DLON* and *DLAT* are the increments in degrees in longitudes and latitudes, respectively. This grid definition uses the default scanning mode used at ECMWF: values start at North-West and follow in consecutive rows from West to East, where West is always the 0° meridian. Example: diff --git a/docs/index.rst b/docs/index.rst index e9ae84d..d41876e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,7 +6,9 @@ Welcome to earthkit-regrids's documentation This project is **BETA** and will be **Experimental** for the foreseeable future. Interfaces and functionality are likely to change, and the project itself may be scrapped. **DO NOT** use this software in any project/software that is operational. -**earthkit-regrid** is a Python package for regridding. It features the :func:`interpolate` function to regrid values stored in an ndarray. At the moment, regridding is only available for a **pre-defined** set of source and target global grid combinations. +**earthkit-regrid** is a Python package for regridding. It is one of the components of :xref:`earthkit`. + +The **earthkit-regrid** API features the :func:`interpolate` function to regrid values stored in an ndarray or earthkit-data GRIB :xref:`fieldlist`. At the moment, regridding is only available for a **pre-defined** set of source and target global grid combinations. .. toctree:: @@ -29,10 +31,18 @@ Welcome to earthkit-regrids's documentation :caption: Installation install + release_notes/index development licence +.. toctree:: + :maxdepth: 1 + :caption: Projects + + earthkit + + Indices and tables ================== diff --git a/docs/interpolate.rst b/docs/interpolate.rst index 885bdf8..d76af35 100644 --- a/docs/interpolate.rst +++ b/docs/interpolate.rst @@ -1,25 +1,26 @@ interpolate ============== -.. py:function:: interpolate(values, source_gridspec, target_gridspec, matrix_source=None, method='linear') +.. py:function:: interpolate(values, in_grid=None, out_grid=None, matrix_source=None, method='linear') - Interpolate the ``values`` from the ``source_gridspec`` onto the ``target_gridspec``. + Interpolate the ``values`` from the ``in_grid`` onto the ``out_grid``. - :param values: the input values - :type values: ndarray - :param source_gridspec: the :ref:`gridspec ` describing the grid that ``values`` are defined on - :type source_gridspec: dict - :param target_gridspec: the :ref:`gridspec ` describing the target grid that ``values`` will be interpolated onto - :type target_gridspec: dict + :param values: the input data. It can be an ndarray or an earthkit-data GRIB :xref:`fieldlist`. ndarrays are assumed to be defined on the ``in_grid``. + :type values: ndarray, :xref:`fieldlist` + :param in_grid: the :ref:`gridspec ` describing the grid that ``values`` are defined on. When ``values`` is a :xref:`fieldlist` the input grid is automatically detected and ``in_grid`` cannot be specified. + :type in_grid: dict + :param out_grid: the :ref:`gridspec ` describing the target grid that ``values`` will be interpolated onto + :type out_grid: dict :param method: the interpolation method. Possible values are ``linear`` and ``nearest-neighbour``. For ``nearest-neighbour`` the following aliases are also supported: ``nn``, ``nearest-neighbor``. :type method: str :param matrix_source: (experimental) the location of a user specified pre-generated matrix inventory. When it is None the default matrix inventory hosted on an ECMWF download server is used. :type matrix_source: str, None - :return: the interpolated values - :rtype: ndarray + :return: The same type of data as ``values`` containing the interpolated values. + :rtype: ndarray, :xref:`fieldlist` :raises ValueError: if a pre-generated interpolation matrix is not available + :raises ValueError: if ``in_grid`` is specified for a :xref:`fieldlist` input - The interpolation only works when a pre-generated interpolation matrix is available for the given ``source_gridspec``, ``target_gridspec`` and ``method`` combination. + The interpolation only works when a pre-generated interpolation matrix is available for the given ``in_grid``, ``out_grid`` and ``method`` combination. When ``matrix_source`` is None (default) the interpolation matrix is automatically downloaded and stored in a local cache and when it is needed again the cached version is used. diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst new file mode 100644 index 0000000..206b2fb --- /dev/null +++ b/docs/release_notes/index.rst @@ -0,0 +1,7 @@ +Release notes +============== + +.. toctree:: + :maxdepth: 1 + + version_0.3_updates diff --git a/docs/release_notes/version_0.3_updates.rst b/docs/release_notes/version_0.3_updates.rst new file mode 100644 index 0000000..8b49b1f --- /dev/null +++ b/docs/release_notes/version_0.3_updates.rst @@ -0,0 +1,17 @@ +Version 0.3 Updates +///////////////////////// + + +Version 0.3.0 +=============== + + +New features +++++++++++++++++ +- restructured and regenerated matrix inventory +- allow using the ``method`` keyword in :func:`interpolate` to specify the interpolation method +- allow using earthkit-data GRIB :xref:`fieldlist` in :func:`interpolate` as input data. This only works when the output grid is regular latitude-longitude grid. +- added notebook examples: + + - :ref:`/examples/healpix_fieldlist.ipynb` + - :ref:`/examples/octahedral_fieldlist.ipynb` diff --git a/earthkit/regrid/interpolate.py b/earthkit/regrid/interpolate.py index 93523bb..8a01be5 100644 --- a/earthkit/regrid/interpolate.py +++ b/earthkit/regrid/interpolate.py @@ -10,13 +10,28 @@ from earthkit.regrid.db import find -def interpolate(values, source_gridspec, target_gridspec, method="linear", **kwargs): - z, shape = find(source_gridspec, target_gridspec, method, **kwargs) +def interpolate(values, in_grid=None, out_grid=None, method="linear", **kwargs): + interpolator = _find_interpolator(values) + if interpolator is None: + raise ValueError(f"Cannot interpolate data with type={type(values)}") + + return interpolator( + values, in_grid=in_grid, out_grid=out_grid, method=method, **kwargs + ) + + +def _find_interpolator(values): + for interpolator in INTERPOLATORS: + if interpolator.match(values): + return interpolator + return None + + +def _interpolate(values, in_grid, out_grid, method, **kwargs): + z, shape = find(in_grid, out_grid, method, **kwargs) if z is None: - raise ValueError( - f"No matrix found! {source_gridspec=} {target_gridspec=} {method=}" - ) + raise ValueError(f"No matrix found! {in_grid=} {out_grid=} {method=}") # This should check for 1D (GG) and 2D (LL) matrices values = values.reshape(-1, 1) @@ -24,3 +39,56 @@ def interpolate(values, source_gridspec, target_gridspec, method="linear", **kwa values = z @ values return values.reshape(shape) + + +class NumpyInterpolator: + @staticmethod + def match(values): + import numpy as np + + return isinstance(values, np.ndarray) + + def __call__(self, values, **kwargs): + in_grid = kwargs.pop("in_grid") + out_grid = kwargs.pop("out_grid") + method = kwargs.pop("method") + return _interpolate(values, in_grid, out_grid, method, **kwargs) + + +class FieldListInterpolator: + @staticmethod + def match(values): + try: + import earthkit.data + + return isinstance(values, earthkit.data.FieldList) + except ImportError: + return False + + def __call__(self, values, **kwargs): + import earthkit.data + + ds = values + in_grid = kwargs.pop("in_grid") + if in_grid is not None: + raise ValueError("in_grid cannot be used for FieldList interpolation") + out_grid = kwargs.pop("out_grid") + method = kwargs.pop("method") + + r = earthkit.data.FieldList() + for f in ds: + vv = f.to_numpy(flatten=True) + v_res = _interpolate( + vv, + f.metadata().gridspec, + out_grid, + method, + **kwargs, + ) + md_res = f.metadata().override(gridspec=out_grid) + r += ds.from_numpy(v_res, md_res) + + return r + + +INTERPOLATORS = [NumpyInterpolator(), FieldListInterpolator()] diff --git a/earthkit/regrid/sphinxext/xref.py b/earthkit/regrid/sphinxext/xref.py new file mode 100644 index 0000000..a144d60 --- /dev/null +++ b/earthkit/regrid/sphinxext/xref.py @@ -0,0 +1,48 @@ +# (C) Copyright 2022 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +# Taken from: https://github.com/michaeljones/sphinx-xref + +from docutils import nodes +from sphinx.util import caption_ref_re + + +def xref(typ, rawtext, text, lineno, inliner, options={}, content=[]): + title = target = text + + # look if explicit title and target are given with `foo ` syntax + brace = text.find("<") + if brace != -1: + m = caption_ref_re.match(text) + if m: + target = m.group(2) + title = m.group(1) + else: + # fallback: everything after '<' is the target + target = text[brace + 1 :] + title = text[:brace] + + link = xref.links[target] + + if brace != -1: + pnode = nodes.reference(target, title, refuri=link[1]) + else: + pnode = nodes.reference(target, link[0], refuri=link[1]) + + return [pnode], [] + + +def get_refs(app): + xref.links = app.config.xref_links + + +def setup(app): + app.add_config_value("xref_links", {}, True) + app.add_role("xref", xref) + app.connect("builder-inited", get_refs)