From 0f123a86b805d2a8e9e46c0ec57281a2038f95a6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 17:03:06 +0200
Subject: [PATCH 01/14] [pre-commit.ci] pre-commit autoupdate (#25)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/psf/black-pre-commit-mirror: 24.4.2 → 24.8.0](https://github.com/psf/black-pre-commit-mirror/compare/24.4.2...24.8.0)
- [github.com/astral-sh/ruff-pre-commit: v0.4.6 → v0.6.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.6...v0.6.2)
- [github.com/tox-dev/pyproject-fmt: 2.1.3 → 2.2.1](https://github.com/tox-dev/pyproject-fmt/compare/2.1.3...2.2.1)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f4b6367..9dc25d3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
   - id: check-added-large-files # Check for large files added to git
   - id: check-merge-conflict # Check for files that contain merge conflict
 - repo: https://github.com/psf/black-pre-commit-mirror
-  rev: 24.4.2
+  rev: 24.8.0
   hooks:
   - id: black
     args: [--line-length=120]
@@ -34,7 +34,7 @@ repos:
     - --force-single-line-imports
     - --profile black
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.4.6
+  rev: v0.6.2
   hooks:
   - id: ruff
     # Next line if for documenation cod snippets
@@ -65,6 +65,6 @@ repos:
   - id: optional-dependencies-all
     args: ["--inplace", "--exclude-keys=dev,docs,tests", "--group=dev=all,docs,tests"]
 - repo: https://github.com/tox-dev/pyproject-fmt
-  rev: "2.1.3"
+  rev: "2.2.1"
   hooks:
   - id: pyproject-fmt

From 5525e7a7c263b69e9809c6a9683505e26515da4a Mon Sep 17 00:00:00 2001
From: Helen Theissen <helen.theissen@ecmwf.int>
Date: Tue, 27 Aug 2024 16:03:30 +0100
Subject: [PATCH 02/14] Ci/changelog-release-updater (#26)

* ci: add changelof release updater

* docs: update changelog
---
 .../workflows/changelog-release-update.yml    | 34 +++++++++++++++++++
 CHANGELOG.md                                  |  1 +
 2 files changed, 35 insertions(+)
 create mode 100644 .github/workflows/changelog-release-update.yml

diff --git a/.github/workflows/changelog-release-update.yml b/.github/workflows/changelog-release-update.yml
new file mode 100644
index 0000000..79b85ad
--- /dev/null
+++ b/.github/workflows/changelog-release-update.yml
@@ -0,0 +1,34 @@
+# .github/workflows/update-changelog.yaml
+name: "Update Changelog"
+
+on:
+  release:
+    types: [released]
+
+permissions:
+  pull-requests: write
+  contents: write
+
+jobs:
+  update:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.event.release.target_commitish }}
+
+    - name: Update Changelog
+      uses: stefanzweifel/changelog-updater-action@v1
+      with:
+        latest-version: ${{ github.event.release.tag_name }}
+        heading-text: ${{ github.event.release.name }}
+
+    - name: Create Pull Request
+      uses: peter-evans/create-pull-request@v6
+      with:
+        branch: docs/changelog-update-${{ github.event.release.tag_name }}
+        title: '[Changelog] Update to ${{ github.event.release.tag_name }}'
+        add-paths: |
+          CHANGELOG.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c0b75f0..73713be 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ Keep it human-readable, your future self will thank you!
 ## [Unreleased]
 
 ### Added
+ - CI workflow to update the changelog on release
 
 ### Changed
 

From b34828b24586c6d5c4574f6501f67a633478456c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 5 Sep 2024 11:31:04 +0200
Subject: [PATCH 03/14] [pre-commit.ci] pre-commit autoupdate (#27)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.6.2 → v0.6.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.2...v0.6.3)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9dc25d3..c042b1f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -34,7 +34,7 @@ repos:
     - --force-single-line-imports
     - --profile black
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.6.2
+  rev: v0.6.3
   hooks:
   - id: ruff
     # Next line if for documenation cod snippets

From 5c75822932ef688fc3c823166f028931da79d5c1 Mon Sep 17 00:00:00 2001
From: Jesper Dramsch <jesper.dramsch@ecmwf.int>
Date: Thu, 5 Sep 2024 16:28:26 +0200
Subject: [PATCH 04/14] feat: make model instantiateable (#29)

* feat: make model instantiateable

* docs: instantiation explained in changelog
---
 CHANGELOG.md                            | 1 +
 src/anemoi/models/interface/__init__.py | 7 +++----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 73713be..d9737b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ Keep it human-readable, your future self will thank you!
  - Update CI to inherit from common infrastructue reusable workflows
  - run downstream-ci only when src and tests folders have changed
  - New error messages for wrongs graphs.
+ - Feature: Change model to be instantiatable in the interface, addressing [#28](https://github.com/ecmwf/anemoi-models/issues/28) through [#29](https://github.com/ecmwf/anemoi-models/pulls/29)
 
 ### Removed
 
diff --git a/src/anemoi/models/interface/__init__.py b/src/anemoi/models/interface/__init__.py
index 54c548d..546003a 100644
--- a/src/anemoi/models/interface/__init__.py
+++ b/src/anemoi/models/interface/__init__.py
@@ -14,7 +14,6 @@
 from hydra.utils import instantiate
 from torch_geometric.data import HeteroData
 
-from anemoi.models.models.encoder_processor_decoder import AnemoiModelEncProcDec
 from anemoi.models.preprocessing import Processors
 
 
@@ -73,9 +72,9 @@ def _build_model(self) -> None:
         self.pre_processors = Processors(processors)
         self.post_processors = Processors(processors, inverse=True)
 
-        # Instantiate the model (Can be generalised to other models in the future, here we use AnemoiModelEncProcDec)
-        self.model = AnemoiModelEncProcDec(
-            config=self.config, data_indices=self.data_indices, graph_data=self.graph_data
+        # Instantiate the model
+        self.model = instantiate(
+            self.config.model.model, config=self.config, data_indices=self.data_indices, graph_data=self.graph_data
         )
 
         # Use the forward method of the model directly

From 4f1de8148049bcb41a1ed22ffb591a13f15bb75c Mon Sep 17 00:00:00 2001
From: Jesper Dramsch <jesper.dramsch@ecmwf.int>
Date: Fri, 6 Sep 2024 16:05:57 +0200
Subject: [PATCH 05/14] Revert "feat: make model instantiateable (#29)" (#31)

This reverts commit 5c75822932ef688fc3c823166f028931da79d5c1.
---
 CHANGELOG.md                            | 1 -
 src/anemoi/models/interface/__init__.py | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9737b8..73713be 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,7 +18,6 @@ Keep it human-readable, your future self will thank you!
  - Update CI to inherit from common infrastructue reusable workflows
  - run downstream-ci only when src and tests folders have changed
  - New error messages for wrongs graphs.
- - Feature: Change model to be instantiatable in the interface, addressing [#28](https://github.com/ecmwf/anemoi-models/issues/28) through [#29](https://github.com/ecmwf/anemoi-models/pulls/29)
 
 ### Removed
 
diff --git a/src/anemoi/models/interface/__init__.py b/src/anemoi/models/interface/__init__.py
index 546003a..54c548d 100644
--- a/src/anemoi/models/interface/__init__.py
+++ b/src/anemoi/models/interface/__init__.py
@@ -14,6 +14,7 @@
 from hydra.utils import instantiate
 from torch_geometric.data import HeteroData
 
+from anemoi.models.models.encoder_processor_decoder import AnemoiModelEncProcDec
 from anemoi.models.preprocessing import Processors
 
 
@@ -72,9 +73,9 @@ def _build_model(self) -> None:
         self.pre_processors = Processors(processors)
         self.post_processors = Processors(processors, inverse=True)
 
-        # Instantiate the model
-        self.model = instantiate(
-            self.config.model.model, config=self.config, data_indices=self.data_indices, graph_data=self.graph_data
+        # Instantiate the model (Can be generalised to other models in the future, here we use AnemoiModelEncProcDec)
+        self.model = AnemoiModelEncProcDec(
+            config=self.config, data_indices=self.data_indices, graph_data=self.graph_data
         )
 
         # Use the forward method of the model directly

From 80787cec6591a626fa69c50d53110be843d69902 Mon Sep 17 00:00:00 2001
From: "S. Hahner" <44293258+sahahner@users.noreply.github.com>
Date: Mon, 9 Sep 2024 17:05:02 +0200
Subject: [PATCH 06/14] 15 remapping of one input variable to multiple new ones
 (#21)

* feat: remapper and change to data indices when mapping one variable to several

* tests: update imputer and normalizer tests

* feat: include remapper as a preprocessor. update init for all preprocessors. add tests for the remapper.

* tests: update tests for index collection

* documentation and changelog

* feat: enable remapper for forcing variables

* tests: include remapping forcing variable and do not test with remapping variables at the end

* comment and warning about using in_place=True in remapper as this is not possible

* comments: incorporate changes/documentation requested by jesper

* change order of function inputs preprocessors, documentation for data indices and remapper

* style: dict in config files for defining the variables to be remapped. structure and additional assert in index collection.

* args in preprocessors
---
 CHANGELOG.md                                  |   2 +
 docs/modules/data_indices.rst                 |  25 +-
 docs/modules/preprocessing.rst                |  13 +
 src/anemoi/models/data_indices/collection.py  |  61 +++-
 src/anemoi/models/interface/__init__.py       |   2 +-
 .../models/encoder_processor_decoder.py       |  19 +-
 src/anemoi/models/preprocessing/__init__.py   |  12 +-
 src/anemoi/models/preprocessing/imputer.py    |  16 +-
 src/anemoi/models/preprocessing/normalizer.py |   8 +-
 src/anemoi/models/preprocessing/remapper.py   | 300 ++++++++++++++++++
 tests/data_indices/test_collection.py         | 136 ++++++--
 .../test_preprocessor_imputer.py              |   6 +-
 .../test_preprocessor_normalizer.py           |   3 +-
 .../test_preprocessor_remapper.py             |  69 ++++
 14 files changed, 620 insertions(+), 52 deletions(-)
 create mode 100644 src/anemoi/models/preprocessing/remapper.py
 create mode 100644 tests/preprocessing/test_preprocessor_remapper.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 73713be..9da050f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,8 @@ Keep it human-readable, your future self will thank you!
 ### Added
  - CI workflow to update the changelog on release
 
+- Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables.
+
 ### Changed
 
  - Update CI to inherit from common infrastructue reusable workflows
diff --git a/docs/modules/data_indices.rst b/docs/modules/data_indices.rst
index 8fbff4d..d28fa04 100644
--- a/docs/modules/data_indices.rst
+++ b/docs/modules/data_indices.rst
@@ -45,12 +45,33 @@ config entry:
    :alt: Schematic of IndexCollection with Data Indexing on Data and Model levels.
    :align: center
 
-The are two Index-levels:
+Additionally, prognostic and forcing variables can be remapped and
+converted to multiple variables. The conversion is then done by the
+remapper-preprocessor.
+
+.. code:: yaml
+
+   data:
+     remapped:
+       - d:
+         - "d_1"
+         - "d_2"
+
+There are two main Index-levels:
 
 -  Data: The data at "Zarr"-level provided by Anemoi-Datasets
 -  Model: The "squeezed" tensors with irrelevant parts missing.
 
-These are both split into two versions:
+Additionally, there are two internal model levels (After preprocessor
+and before postprocessor) that are necessary because of the possiblity
+to remap variables to multiple variables.
+
+-  Internal Data: Variables from Data-level that are used internally in
+   the model, but not exposed to the user.
+-  Internal Model: Variables from Model-level that are used internally
+   in the model, but not exposed to the user.
+
+All indices at the different levels are split into two versions:
 
 -  Input: The data going into training / model
 -  Output: The data produced by training / model
diff --git a/docs/modules/preprocessing.rst b/docs/modules/preprocessing.rst
index e68898f..e1eab7f 100644
--- a/docs/modules/preprocessing.rst
+++ b/docs/modules/preprocessing.rst
@@ -33,3 +33,16 @@ following classes:
    :members:
    :no-undoc-members:
    :show-inheritance:
+
+**********
+ Remapper
+**********
+
+The remapper module is used to remap one variable to multiple other
+variables that have been listed in data.remapped:. The module contains
+the following classes:
+
+.. automodule:: anemoi.models.preprocessing.remapper
+   :members:
+   :no-undoc-members:
+   :show-inheritance:
diff --git a/src/anemoi/models/data_indices/collection.py b/src/anemoi/models/data_indices/collection.py
index 3e325aa..0d1031b 100644
--- a/src/anemoi/models/data_indices/collection.py
+++ b/src/anemoi/models/data_indices/collection.py
@@ -25,26 +25,74 @@ class IndexCollection:
 
     def __init__(self, config, name_to_index) -> None:
         self.config = OmegaConf.to_container(config, resolve=True)
-
+        self.name_to_index = dict(sorted(name_to_index.items(), key=operator.itemgetter(1)))
         self.forcing = [] if config.data.forcing is None else OmegaConf.to_container(config.data.forcing, resolve=True)
         self.diagnostic = (
             [] if config.data.diagnostic is None else OmegaConf.to_container(config.data.diagnostic, resolve=True)
         )
+        # config.data.remapped is a list of diccionaries: every remapper is one entry of the list
+        self.remapped = (
+            dict() if config.data.remapped is None else OmegaConf.to_container(config.data.remapped, resolve=True)
+        )
+        self.forcing_remapped = self.forcing.copy()
 
         assert set(self.diagnostic).isdisjoint(self.forcing), (
             f"Diagnostic and forcing variables overlap: {set(self.diagnostic).intersection(self.forcing)}. ",
             "Please drop them at a dataset-level to exclude them from the training data.",
         )
-        self.name_to_index = dict(sorted(name_to_index.items(), key=operator.itemgetter(1)))
+        assert set(self.remapped).isdisjoint(self.diagnostic), (
+            "Remapped variable overlap with diagnostic variables. Not implemented.",
+        )
+        assert set(self.remapped).issubset(self.name_to_index), (
+            "Remapping a variable that does not exist in the dataset. Check for typos: ",
+            f"{set(self.remapped).difference(self.name_to_index)}",
+        )
         name_to_index_model_input = {
             name: i for i, name in enumerate(key for key in self.name_to_index if key not in self.diagnostic)
         }
         name_to_index_model_output = {
             name: i for i, name in enumerate(key for key in self.name_to_index if key not in self.forcing)
         }
+        # remove remapped variables from internal data and model indices
+        name_to_index_internal_data_input = {
+            name: i for i, name in enumerate(key for key in self.name_to_index if key not in self.remapped)
+        }
+        name_to_index_internal_model_input = {
+            name: i for i, name in enumerate(key for key in name_to_index_model_input if key not in self.remapped)
+        }
+        name_to_index_internal_model_output = {
+            name: i for i, name in enumerate(key for key in name_to_index_model_output if key not in self.remapped)
+        }
+        # for all variables to be remapped we add the resulting remapped variables to the end of the tensors
+        # keep track of that in the index collections
+        for key in self.remapped:
+            for mapped in self.remapped[key]:
+                # add index of remapped variables to dictionary
+                name_to_index_internal_model_input[mapped] = len(name_to_index_internal_model_input)
+                name_to_index_internal_data_input[mapped] = len(name_to_index_internal_data_input)
+                if key not in self.forcing:
+                    # do not include forcing variables in the remapped model output
+                    name_to_index_internal_model_output[mapped] = len(name_to_index_internal_model_output)
+                else:
+                    # add remapped forcing variables to forcing_remapped
+                    self.forcing_remapped += [mapped]
+            if key in self.forcing:
+                # if key is in forcing we need to remove it from forcing_remapped after remapped variables have been added
+                self.forcing_remapped.remove(key)
 
         self.data = DataIndex(self.diagnostic, self.forcing, self.name_to_index)
+        self.internal_data = DataIndex(
+            self.diagnostic,
+            self.forcing_remapped,
+            name_to_index_internal_data_input,
+        )  # internal after the remapping applied to data (training)
         self.model = ModelIndex(self.diagnostic, self.forcing, name_to_index_model_input, name_to_index_model_output)
+        self.internal_model = ModelIndex(
+            self.diagnostic,
+            self.forcing_remapped,
+            name_to_index_internal_model_input,
+            name_to_index_internal_model_output,
+        )  # internal after the remapping applied to model (inference)
 
     def __repr__(self) -> str:
         return f"IndexCollection(config={self.config}, name_to_index={self.name_to_index})"
@@ -54,7 +102,12 @@ def __eq__(self, other):
             # don't attempt to compare against unrelated types
             return NotImplemented
 
-        return self.model == other.model and self.data == other.data
+        return (
+            self.model == other.model
+            and self.data == other.data
+            and self.internal_model == other.internal_model
+            and self.internal_data == other.internal_data
+        )
 
     def __getitem__(self, key):
         return getattr(self, key)
@@ -63,6 +116,8 @@ def todict(self):
         return {
             "data": self.data.todict(),
             "model": self.model.todict(),
+            "internal_model": self.internal_model.todict(),
+            "internal_data": self.internal_data.todict(),
         }
 
     @staticmethod
diff --git a/src/anemoi/models/interface/__init__.py b/src/anemoi/models/interface/__init__.py
index 54c548d..626940f 100644
--- a/src/anemoi/models/interface/__init__.py
+++ b/src/anemoi/models/interface/__init__.py
@@ -65,7 +65,7 @@ def _build_model(self) -> None:
         """Builds the model and pre- and post-processors."""
         # Instantiate processors
         processors = [
-            [name, instantiate(processor, statistics=self.statistics, data_indices=self.data_indices)]
+            [name, instantiate(processor, data_indices=self.data_indices, statistics=self.statistics)]
             for name, processor in self.config.data.processors.items()
         ]
 
diff --git a/src/anemoi/models/models/encoder_processor_decoder.py b/src/anemoi/models/models/encoder_processor_decoder.py
index 0f37474..3414dc5 100644
--- a/src/anemoi/models/models/encoder_processor_decoder.py
+++ b/src/anemoi/models/models/encoder_processor_decoder.py
@@ -104,22 +104,23 @@ def __init__(
         )
 
     def _calculate_shapes_and_indices(self, data_indices: dict) -> None:
-        self.num_input_channels = len(data_indices.model.input)
-        self.num_output_channels = len(data_indices.model.output)
-        self._internal_input_idx = data_indices.model.input.prognostic
-        self._internal_output_idx = data_indices.model.output.prognostic
+        self.num_input_channels = len(data_indices.internal_model.input)
+        self.num_output_channels = len(data_indices.internal_model.output)
+        self._internal_input_idx = data_indices.internal_model.input.prognostic
+        self._internal_output_idx = data_indices.internal_model.output.prognostic
 
     def _assert_matching_indices(self, data_indices: dict) -> None:
 
-        assert len(self._internal_output_idx) == len(data_indices.model.output.full) - len(
-            data_indices.model.output.diagnostic
+        assert len(self._internal_output_idx) == len(data_indices.internal_model.output.full) - len(
+            data_indices.internal_model.output.diagnostic
         ), (
-            f"Mismatch between the internal data indices ({len(self._internal_output_idx)}) and the output indices excluding "
-            f"diagnostic variables ({len(data_indices.model.output.full) - len(data_indices.model.output.diagnostic)})",
+            f"Mismatch between the internal data indices ({len(self._internal_output_idx)}) and "
+            f"the internal output indices excluding diagnostic variables "
+            f"({len(data_indices.internal_model.output.full) - len(data_indices.internal_model.output.diagnostic)})",
         )
         assert len(self._internal_input_idx) == len(
             self._internal_output_idx,
-        ), f"Model indices must match {self._internal_input_idx} != {self._internal_output_idx}"
+        ), f"Internal model indices must match {self._internal_input_idx} != {self._internal_output_idx}"
 
     def _define_tensor_sizes(self, config: DotDict) -> None:
         self._data_grid_size = self._graph_data[self._graph_name_data].num_nodes
diff --git a/src/anemoi/models/preprocessing/__init__.py b/src/anemoi/models/preprocessing/__init__.py
index 081afaf..1368046 100644
--- a/src/anemoi/models/preprocessing/__init__.py
+++ b/src/anemoi/models/preprocessing/__init__.py
@@ -8,12 +8,16 @@
 #
 
 import logging
+from typing import TYPE_CHECKING
 from typing import Optional
 
 import torch
 from torch import Tensor
 from torch import nn
 
+if TYPE_CHECKING:
+    from anemoi.models.data_indices.collection import IndexCollection
+
 LOGGER = logging.getLogger(__name__)
 
 
@@ -23,19 +27,19 @@ class BasePreprocessor(nn.Module):
     def __init__(
         self,
         config=None,
+        data_indices: Optional[IndexCollection] = None,
         statistics: Optional[dict] = None,
-        data_indices: Optional[dict] = None,
     ) -> None:
         """Initialize the preprocessor.
 
         Parameters
         ----------
         config : DotDict
-            configuration object
+            configuration object of the processor
+        data_indices : IndexCollection
+            Data indices for input and output variables
         statistics : dict
             Data statistics dictionary
-        data_indices : dict
-            Data indices for input and output variables
         """
         super().__init__()
 
diff --git a/src/anemoi/models/preprocessing/imputer.py b/src/anemoi/models/preprocessing/imputer.py
index a7b0a8a..6ef5adb 100644
--- a/src/anemoi/models/preprocessing/imputer.py
+++ b/src/anemoi/models/preprocessing/imputer.py
@@ -33,16 +33,15 @@ def __init__(
         Parameters
         ----------
         config : DotDict
-            configuration object
+            configuration object of the processor
+        data_indices : IndexCollection
+            Data indices for input and output variables
         statistics : dict
             Data statistics dictionary
-        data_indices : dict
-            Data indices for input and output variables
         """
-        super().__init__(config, statistics, data_indices)
+        super().__init__(config, data_indices, statistics)
 
         self.nan_locations = None
-        self.data_indices = data_indices
 
     def _validate_indices(self):
         assert len(self.index_training_input) == len(self.index_inference_input) <= len(self.replacement), (
@@ -174,8 +173,8 @@ class InputImputer(BaseImputer):
     def __init__(
         self,
         config=None,
+        data_indices: Optional[IndexCollection] = None,
         statistics: Optional[dict] = None,
-        data_indices: Optional[dict] = None,
     ) -> None:
         super().__init__(config, data_indices, statistics)
 
@@ -201,7 +200,10 @@ class ConstantImputer(BaseImputer):
     """
 
     def __init__(
-        self, config=None, statistics: Optional[dict] = None, data_indices: Optional[IndexCollection] = None
+        self,
+        config=None,
+        data_indices: Optional[IndexCollection] = None,
+        statistics: Optional[dict] = None,
     ) -> None:
         super().__init__(config, data_indices, statistics)
 
diff --git a/src/anemoi/models/preprocessing/normalizer.py b/src/anemoi/models/preprocessing/normalizer.py
index 5bb97ee..bc75466 100644
--- a/src/anemoi/models/preprocessing/normalizer.py
+++ b/src/anemoi/models/preprocessing/normalizer.py
@@ -34,13 +34,13 @@ def __init__(
         Parameters
         ----------
         config : DotDict
-            configuration object
+            configuration object of the processor
+        data_indices : IndexCollection
+            Data indices for input and output variables
         statistics : dict
             Data statistics dictionary
-        data_indices : dict
-            Data indices for input and output variables
         """
-        super().__init__(config, statistics, data_indices)
+        super().__init__(config, data_indices, statistics)
 
         name_to_index_training_input = self.data_indices.data.input.name_to_index
 
diff --git a/src/anemoi/models/preprocessing/remapper.py b/src/anemoi/models/preprocessing/remapper.py
new file mode 100644
index 0000000..a79e2af
--- /dev/null
+++ b/src/anemoi/models/preprocessing/remapper.py
@@ -0,0 +1,300 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+
+import logging
+from abc import ABC
+from typing import Optional
+
+import torch
+
+from anemoi.models.data_indices.collection import IndexCollection
+from anemoi.models.preprocessing import BasePreprocessor
+
+LOGGER = logging.getLogger(__name__)
+
+
+def cos_converter(x):
+    """Convert angle in degree to cos."""
+    return torch.cos(x / 180 * torch.pi)
+
+
+def sin_converter(x):
+    """Convert angle in degree to sin."""
+    return torch.sin(x / 180 * torch.pi)
+
+
+def atan2_converter(x):
+    """Convert cos and sin to angle in degree.
+
+    Input:
+    x[..., 0]: cos
+    x[..., 1]: sin
+    """
+    return torch.remainder(torch.atan2(x[..., 1], x[..., 0]) * 180 / torch.pi, 360)
+
+
+class BaseRemapperVariable(BasePreprocessor, ABC):
+    """Base class for Remapping Variables."""
+
+    def __init__(
+        self,
+        config=None,
+        data_indices: Optional[IndexCollection] = None,
+        statistics: Optional[dict] = None,
+    ) -> None:
+        """Initialize the remapper.
+
+        Parameters
+        ----------
+        config : DotDict
+            configuration object of the processor
+        data_indices : IndexCollection
+            Data indices for input and output variables
+        statistics : dict
+            Data statistics dictionary
+        """
+        super().__init__(config, data_indices, statistics)
+
+    def _validate_indices(self):
+        assert len(self.index_training_input) == len(self.index_inference_input) <= len(self.remappers), (
+            f"Error creating conversion indices {len(self.index_training_input)}, "
+            f"{len(self.index_inference_input)}, {len(self.remappers)}"
+        )
+        assert len(self.index_training_output) == len(self.index_inference_output) <= len(self.remappers), (
+            f"Error creating conversion indices {len(self.index_training_output)}, "
+            f"{len(self.index_inference_output)}, {len(self.remappers)}"
+        )
+        assert len(set(self.index_training_input + self.indices_keep_training_input)) == self.num_training_input_vars, (
+            "Error creating conversion indices: variables remapped in config.data.remapped "
+            "that have no remapping function defined. Preprocessed tensors contains empty columns."
+        )
+
+    def _create_remapping_indices(
+        self,
+        statistics=None,
+    ):
+        """Create the parameter indices for remapping."""
+        # list for training and inference mode as position of parameters can change
+        name_to_index_training_input = self.data_indices.data.input.name_to_index
+        name_to_index_inference_input = self.data_indices.model.input.name_to_index
+        name_to_index_training_remapped_input = self.data_indices.internal_data.input.name_to_index
+        name_to_index_inference_remapped_input = self.data_indices.internal_model.input.name_to_index
+        name_to_index_training_remapped_output = self.data_indices.internal_data.output.name_to_index
+        name_to_index_inference_remapped_output = self.data_indices.internal_model.output.name_to_index
+        name_to_index_training_output = self.data_indices.data.output.name_to_index
+        name_to_index_inference_output = self.data_indices.model.output.name_to_index
+
+        self.num_training_input_vars = len(name_to_index_training_input)
+        self.num_inference_input_vars = len(name_to_index_inference_input)
+        self.num_remapped_training_input_vars = len(name_to_index_training_remapped_input)
+        self.num_remapped_inference_input_vars = len(name_to_index_inference_remapped_input)
+        self.num_remapped_training_output_vars = len(name_to_index_training_remapped_output)
+        self.num_remapped_inference_output_vars = len(name_to_index_inference_remapped_output)
+        self.num_training_output_vars = len(name_to_index_training_output)
+        self.num_inference_output_vars = len(name_to_index_inference_output)
+        self.indices_keep_training_input = []
+        for key, item in self.data_indices.data.input.name_to_index.items():
+            if key in self.data_indices.internal_data.input.name_to_index:
+                self.indices_keep_training_input.append(item)
+        self.indices_keep_inference_input = []
+        for key, item in self.data_indices.model.input.name_to_index.items():
+            if key in self.data_indices.internal_model.input.name_to_index:
+                self.indices_keep_inference_input.append(item)
+        self.indices_keep_training_output = []
+        for key, item in self.data_indices.data.output.name_to_index.items():
+            if key in self.data_indices.internal_data.output.name_to_index:
+                self.indices_keep_training_output.append(item)
+        self.indices_keep_inference_output = []
+        for key, item in self.data_indices.model.output.name_to_index.items():
+            if key in self.data_indices.internal_model.output.name_to_index:
+                self.indices_keep_inference_output.append(item)
+
+        (
+            self.index_training_input,
+            self.index_training_remapped_input,
+            self.index_inference_input,
+            self.index_inference_remapped_input,
+            self.index_training_output,
+            self.index_training_backmapped_output,
+            self.index_inference_output,
+            self.index_inference_backmapped_output,
+            self.remappers,
+            self.backmappers,
+        ) = ([], [], [], [], [], [], [], [], [], [])
+
+        # Create parameter indices for remapping variables
+        for name in name_to_index_training_input:
+
+            method = self.methods.get(name, self.default)
+
+            if method == "none":
+                continue
+
+            if method == "cos_sin":
+                self.index_training_input.append(name_to_index_training_input[name])
+                self.index_training_output.append(name_to_index_training_output[name])
+                self.index_inference_input.append(name_to_index_inference_input[name])
+                if name in name_to_index_inference_output:
+                    self.index_inference_output.append(name_to_index_inference_output[name])
+                else:
+                    # this is a forcing variable. It is not in the inference output.
+                    self.index_inference_output.append(None)
+                multiple_training_output, multiple_inference_output = [], []
+                multiple_training_input, multiple_inference_input = [], []
+                for name_dst in self.method_config[method][name]:
+                    assert name_dst in self.data_indices.internal_data.input.name_to_index, (
+                        f"Trying to remap {name} to {name_dst}, but {name_dst} not a variable. "
+                        f"Remap {name} to {name_dst} in config.data.remapped. "
+                    )
+                    multiple_training_input.append(name_to_index_training_remapped_input[name_dst])
+                    multiple_training_output.append(name_to_index_training_remapped_output[name_dst])
+                    multiple_inference_input.append(name_to_index_inference_remapped_input[name_dst])
+                    if name_dst in name_to_index_inference_remapped_output:
+                        multiple_inference_output.append(name_to_index_inference_remapped_output[name_dst])
+                    else:
+                        # this is a forcing variable. It is not in the inference output.
+                        multiple_inference_output.append(None)
+
+                self.index_training_remapped_input.append(multiple_training_input)
+                self.index_inference_remapped_input.append(multiple_inference_input)
+                self.index_training_backmapped_output.append(multiple_training_output)
+                self.index_inference_backmapped_output.append(multiple_inference_output)
+
+                self.remappers.append([cos_converter, sin_converter])
+                self.backmappers.append(atan2_converter)
+
+                LOGGER.info(f"Map {name} to cosine and sine and save result in {self.method_config[method][name]}.")
+
+            else:
+                raise ValueError[f"Unknown remapping method for {name}: {method}"]
+
+    def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor:
+        """Remap and convert the input tensor.
+
+        ```
+        x : torch.Tensor
+            Input tensor
+        in_place : bool
+            Whether to process the tensor in place.
+            in_place is not possible for this preprocessor.
+        ```
+        """
+        # Choose correct index based on number of variables
+        if x.shape[-1] == self.num_training_input_vars:
+            index = self.index_training_input
+            indices_remapped = self.index_training_remapped_input
+            indices_keep = self.indices_keep_training_input
+            target_number_columns = self.num_remapped_training_input_vars
+
+        elif x.shape[-1] == self.num_inference_input_vars:
+            index = self.index_inference_input
+            indices_remapped = self.index_inference_remapped_input
+            indices_keep = self.indices_keep_inference_input
+            target_number_columns = self.num_remapped_inference_input_vars
+
+        else:
+            raise ValueError(
+                f"Input tensor ({x.shape[-1]}) does not match the training "
+                f"({self.num_training_input_vars}) or inference shape ({self.num_inference_input_vars})",
+            )
+
+        # create new tensor with target number of columns
+        x_remapped = torch.zeros(x.shape[:-1] + (target_number_columns,), dtype=x.dtype, device=x.device)
+        if in_place and not self.printed_preprocessor_warning:
+            LOGGER.warning(
+                "Remapper (preprocessor) called with in_place=True. This preprocessor cannot be applied in_place as new columns are added to the tensors.",
+            )
+            self.printed_preprocessor_warning = True
+
+        # copy variables that are not remapped
+        x_remapped[..., : len(indices_keep)] = x[..., indices_keep]
+
+        # Remap variables
+        for idx_dst, remapper, idx_src in zip(indices_remapped, self.remappers, index):
+            if idx_src is not None:
+                for jj, ii in enumerate(idx_dst):
+                    x_remapped[..., ii] = remapper[jj](x[..., idx_src])
+
+        return x_remapped
+
+    def inverse_transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor:
+        """Convert and remap the output tensor.
+
+        ```
+        x : torch.Tensor
+            Input tensor
+        in_place : bool
+            Whether to process the tensor in place.
+            in_place is not possible for this postprocessor.
+        ```
+        """
+        # Choose correct index based on number of variables
+        if x.shape[-1] == self.num_remapped_training_output_vars:
+            index = self.index_training_output
+            indices_remapped = self.index_training_backmapped_output
+            indices_keep = self.indices_keep_training_output
+            target_number_columns = self.num_training_output_vars
+
+        elif x.shape[-1] == self.num_remapped_inference_output_vars:
+            index = self.index_inference_output
+            indices_remapped = self.index_inference_backmapped_output
+            indices_keep = self.indices_keep_inference_output
+            target_number_columns = self.num_inference_output_vars
+
+        else:
+            raise ValueError(
+                f"Input tensor ({x.shape[-1]}) does not match the training "
+                f"({self.num_remapped_training_output_vars}) or inference shape ({self.num_remapped_inference_output_vars})",
+            )
+
+        # create new tensor with target number of columns
+        x_remapped = torch.zeros(x.shape[:-1] + (target_number_columns,), dtype=x.dtype, device=x.device)
+        if in_place and not self.printed_postprocessor_warning:
+            LOGGER.warning(
+                "Remapper (preprocessor) called with in_place=True. This preprocessor cannot be applied in_place as new columns are added to the tensors.",
+            )
+            self.printed_postprocessor_warning = True
+
+        # copy variables that are not remapped
+        x_remapped[..., indices_keep] = x[..., : len(indices_keep)]
+
+        # Backmap variables
+        for idx_dst, backmapper, idx_src in zip(index, self.backmappers, indices_remapped):
+            if idx_dst is not None:
+                x_remapped[..., idx_dst] = backmapper(x[..., idx_src])
+
+        return x_remapped
+
+
+class Remapper(BaseRemapperVariable):
+    """Remap and convert variables.
+
+    cos_sin:
+        Remap the variable to cosine and sine.
+        Map output back to degrees.
+
+    ```
+    cos_sin:
+      "mwd" : ["cos_mwd", "sin_mwd"]
+    ```
+    """
+
+    def __init__(
+        self,
+        config=None,
+        data_indices: Optional[IndexCollection] = None,
+        statistics: Optional[dict] = None,
+    ) -> None:
+        super().__init__(config, data_indices, statistics)
+
+        self.printed_preprocessor_warning, self.printed_postprocessor_warning = False, False
+
+        self._create_remapping_indices(statistics)
+
+        self._validate_indices()
diff --git a/tests/data_indices/test_collection.py b/tests/data_indices/test_collection.py
index 5558c91..78b3ecd 100644
--- a/tests/data_indices/test_collection.py
+++ b/tests/data_indices/test_collection.py
@@ -17,50 +17,102 @@ def data_indices():
     config = DictConfig(
         {
             "data": {
-                "forcing": ["x"],
+                "forcing": ["x", "e"],
                 "diagnostic": ["z", "q"],
+                "remapped": [
+                    {
+                        "e": ["e_1", "e_2"],
+                        "d": ["d_1", "d_2"],
+                    }
+                ],
             },
         },
     )
-    name_to_index = {"x": 0, "y": 1, "z": 2, "q": 3, "other": 4}
+    name_to_index = {"x": 0, "y": 1, "z": 2, "q": 3, "e": 4, "d": 5, "other": 6}
     return IndexCollection(config=config, name_to_index=name_to_index)
 
 
 def test_dataindices_init(data_indices) -> None:
-    assert data_indices.data.input.includes == ["x"]
+    assert data_indices.data.input.includes == ["x", "e"]
     assert data_indices.data.input.excludes == ["z", "q"]
+    assert data_indices.internal_data.input.includes == ["x", "e_1", "e_2"]
+    assert data_indices.internal_data.input.excludes == ["z", "q"]
+    assert data_indices.internal_data.output.includes == ["z", "q"]
+    assert data_indices.internal_data.output.excludes == ["x", "e_1", "e_2"]
     assert data_indices.data.output.includes == ["z", "q"]
-    assert data_indices.data.output.excludes == ["x"]
-    assert data_indices.model.input.includes == ["x"]
+    assert data_indices.data.output.excludes == ["x", "e"]
+    assert data_indices.model.input.includes == ["x", "e"]
     assert data_indices.model.input.excludes == []
+    assert data_indices.internal_model.input.includes == ["x", "e_1", "e_2"]
+    assert data_indices.internal_model.input.excludes == []
+    assert data_indices.internal_model.output.includes == ["z", "q"]
+    assert data_indices.internal_model.output.excludes == []
     assert data_indices.model.output.includes == ["z", "q"]
     assert data_indices.model.output.excludes == []
-    assert data_indices.data.input.name_to_index == {"x": 0, "y": 1, "z": 2, "q": 3, "other": 4}
-    assert data_indices.data.output.name_to_index == {"x": 0, "y": 1, "z": 2, "q": 3, "other": 4}
-    assert data_indices.model.input.name_to_index == {"x": 0, "y": 1, "other": 2}
-    assert data_indices.model.output.name_to_index == {"y": 0, "z": 1, "q": 2, "other": 3}
+    assert data_indices.data.input.name_to_index == {"x": 0, "y": 1, "z": 2, "q": 3, "e": 4, "d": 5, "other": 6}
+    assert data_indices.internal_data.input.name_to_index == {
+        "x": 0,
+        "y": 1,
+        "z": 2,
+        "q": 3,
+        "other": 4,
+        "e_1": 5,
+        "e_2": 6,
+        "d_1": 7,
+        "d_2": 8,
+    }
+    assert data_indices.internal_data.output.name_to_index == {
+        "x": 0,
+        "y": 1,
+        "z": 2,
+        "q": 3,
+        "other": 4,
+        "e_1": 5,
+        "e_2": 6,
+        "d_1": 7,
+        "d_2": 8,
+    }
+    assert data_indices.data.output.name_to_index == {"x": 0, "y": 1, "z": 2, "q": 3, "e": 4, "d": 5, "other": 6}
+    assert data_indices.model.input.name_to_index == {"x": 0, "y": 1, "e": 2, "d": 3, "other": 4}
+    assert data_indices.internal_model.input.name_to_index == {
+        "x": 0,
+        "y": 1,
+        "other": 2,
+        "e_1": 3,
+        "e_2": 4,
+        "d_1": 5,
+        "d_2": 6,
+    }
+    assert data_indices.internal_model.output.name_to_index == {"y": 0, "z": 1, "q": 2, "other": 3, "d_1": 4, "d_2": 5}
+    assert data_indices.model.output.name_to_index == {"y": 0, "z": 1, "q": 2, "d": 3, "other": 4}
 
 
 def test_dataindices_max(data_indices) -> None:
     assert max(data_indices.data.input.full) == max(data_indices.data.input.name_to_index.values())
+    assert max(data_indices.internal_data.input.full) == max(data_indices.internal_data.input.name_to_index.values())
+    assert max(data_indices.internal_data.output.full) == max(data_indices.internal_data.output.name_to_index.values())
     assert max(data_indices.data.output.full) == max(data_indices.data.output.name_to_index.values())
     assert max(data_indices.model.input.full) == max(data_indices.model.input.name_to_index.values())
+    assert max(data_indices.internal_model.input.full) == max(data_indices.internal_model.input.name_to_index.values())
+    assert max(data_indices.internal_model.output.full) == max(
+        data_indices.internal_model.output.name_to_index.values()
+    )
     assert max(data_indices.model.output.full) == max(data_indices.model.output.name_to_index.values())
 
 
 def test_dataindices_todict(data_indices) -> None:
     expected_output = {
         "input": {
-            "full": torch.Tensor([0, 1, 4]).to(torch.int),
-            "forcing": torch.Tensor([0]).to(torch.int),
+            "full": torch.Tensor([0, 1, 4, 5, 6]).to(torch.int),
+            "forcing": torch.Tensor([0, 4]).to(torch.int),
             "diagnostic": torch.Tensor([2, 3]).to(torch.int),
-            "prognostic": torch.Tensor([1, 4]).to(torch.int),
+            "prognostic": torch.Tensor([1, 5, 6]).to(torch.int),
         },
         "output": {
-            "full": torch.Tensor([1, 2, 3, 4]).to(torch.int),
-            "forcing": torch.Tensor([0]).to(torch.int),
+            "full": torch.Tensor([1, 2, 3, 5, 6]).to(torch.int),
+            "forcing": torch.Tensor([0, 4]).to(torch.int),
             "diagnostic": torch.Tensor([2, 3]).to(torch.int),
-            "prognostic": torch.Tensor([1, 4]).to(torch.int),
+            "prognostic": torch.Tensor([1, 5, 6]).to(torch.int),
         },
     }
 
@@ -70,19 +122,41 @@ def test_dataindices_todict(data_indices) -> None:
             assert torch.allclose(value, expected_output[key][subkey])
 
 
+def test_internaldataindices_todict(data_indices) -> None:
+    expected_output = {
+        "input": {
+            "full": torch.Tensor([0, 1, 4, 5, 6, 7, 8]).to(torch.int),
+            "forcing": torch.Tensor([0, 5, 6]).to(torch.int),
+            "diagnostic": torch.Tensor([2, 3]).to(torch.int),
+            "prognostic": torch.Tensor([1, 4, 7, 8]).to(torch.int),
+        },
+        "output": {
+            "full": torch.Tensor([1, 2, 3, 4, 7, 8]).to(torch.int),
+            "forcing": torch.Tensor([0, 5, 6]).to(torch.int),
+            "diagnostic": torch.Tensor([2, 3]).to(torch.int),
+            "prognostic": torch.Tensor([1, 4, 7, 8]).to(torch.int),
+        },
+    }
+
+    for key in ["output", "input"]:
+        for subkey, value in data_indices.internal_data.todict()[key].items():
+            assert subkey in expected_output[key]
+            assert torch.allclose(value, expected_output[key][subkey])
+
+
 def test_modelindices_todict(data_indices) -> None:
     expected_output = {
         "input": {
-            "full": torch.Tensor([0, 1, 2]).to(torch.int),
-            "forcing": torch.Tensor([0]).to(torch.int),
+            "full": torch.Tensor([0, 1, 2, 3, 4]).to(torch.int),
+            "forcing": torch.Tensor([0, 2]).to(torch.int),
             "diagnostic": torch.Tensor([]).to(torch.int),
-            "prognostic": torch.Tensor([1, 2]).to(torch.int),
+            "prognostic": torch.Tensor([1, 3, 4]).to(torch.int),
         },
         "output": {
-            "full": torch.Tensor([0, 1, 2, 3]).to(torch.int),
+            "full": torch.Tensor([0, 1, 2, 3, 4]).to(torch.int),
             "forcing": torch.Tensor([]).to(torch.int),
             "diagnostic": torch.Tensor([1, 2]).to(torch.int),
-            "prognostic": torch.Tensor([0, 3]).to(torch.int),
+            "prognostic": torch.Tensor([0, 3, 4]).to(torch.int),
         },
     }
 
@@ -90,3 +164,25 @@ def test_modelindices_todict(data_indices) -> None:
         for subkey, value in data_indices.model.todict()[key].items():
             assert subkey in expected_output[key]
             assert torch.allclose(value, expected_output[key][subkey])
+
+
+def test_internalmodelindices_todict(data_indices) -> None:
+    expected_output = {
+        "input": {
+            "full": torch.Tensor([0, 1, 2, 3, 4, 5, 6]).to(torch.int),
+            "forcing": torch.Tensor([0, 3, 4]).to(torch.int),
+            "diagnostic": torch.Tensor([]).to(torch.int),
+            "prognostic": torch.Tensor([1, 2, 5, 6]).to(torch.int),
+        },
+        "output": {
+            "full": torch.Tensor([0, 1, 2, 3, 4, 5]).to(torch.int),
+            "forcing": torch.Tensor([]).to(torch.int),
+            "diagnostic": torch.Tensor([1, 2]).to(torch.int),
+            "prognostic": torch.Tensor([0, 3, 4, 5]).to(torch.int),
+        },
+    }
+
+    for key in ["output", "input"]:
+        for subkey, value in data_indices.internal_model.todict()[key].items():
+            assert subkey in expected_output[key]
+            assert torch.allclose(value, expected_output[key][subkey])
diff --git a/tests/preprocessing/test_preprocessor_imputer.py b/tests/preprocessing/test_preprocessor_imputer.py
index ea04b9a..e161c12 100644
--- a/tests/preprocessing/test_preprocessor_imputer.py
+++ b/tests/preprocessing/test_preprocessor_imputer.py
@@ -26,6 +26,7 @@ def non_default_input_imputer():
                 "imputer": {"default": "none", "mean": ["y"], "maximum": ["x"], "none": ["z"], "minimum": ["q"]},
                 "forcing": ["z", "q"],
                 "diagnostic": ["other"],
+                "remapped": [],
             },
         },
     )
@@ -37,7 +38,7 @@ def non_default_input_imputer():
     }
     name_to_index = {"x": 0, "y": 1, "z": 2, "q": 3, "other": 4}
     data_indices = IndexCollection(config=config, name_to_index=name_to_index)
-    return InputImputer(config=config.data.imputer, statistics=statistics, data_indices=data_indices)
+    return InputImputer(config=config.data.imputer, data_indices=data_indices, statistics=statistics)
 
 
 @pytest.fixture()
@@ -49,6 +50,7 @@ def default_input_imputer():
                 "imputer": {"default": "minimum"},
                 "forcing": ["z", "q"],
                 "diagnostic": ["other"],
+                "remapped": [],
             },
         },
     )
@@ -86,6 +88,7 @@ def default_constant_imputer():
                 "imputer": {"default": "none", 0: ["x"], 3.0: ["y"], 22.7: ["z"], 10: ["q"]},
                 "forcing": ["z", "q"],
                 "diagnostic": ["other"],
+                "remapped": [],
             },
         },
     )
@@ -103,6 +106,7 @@ def non_default_constant_imputer():
                 "imputer": {"default": 22.7},
                 "forcing": ["z", "q"],
                 "diagnostic": ["other"],
+                "remapped": [],
             },
         },
     )
diff --git a/tests/preprocessing/test_preprocessor_normalizer.py b/tests/preprocessing/test_preprocessor_normalizer.py
index 787079d..9432b83 100644
--- a/tests/preprocessing/test_preprocessor_normalizer.py
+++ b/tests/preprocessing/test_preprocessor_normalizer.py
@@ -25,6 +25,7 @@ def input_normalizer():
                 "normalizer": {"default": "mean-std", "min-max": ["x"], "max": ["y"], "none": ["z"], "mean-std": ["q"]},
                 "forcing": ["z", "q"],
                 "diagnostic": ["other"],
+                "remapped": [],
             },
         },
     )
@@ -36,7 +37,7 @@ def input_normalizer():
     }
     name_to_index = {"x": 0, "y": 1, "z": 2, "q": 3, "other": 4}
     data_indices = IndexCollection(config=config, name_to_index=name_to_index)
-    return InputNormalizer(config=config.data.normalizer, statistics=statistics, data_indices=data_indices)
+    return InputNormalizer(config=config.data.normalizer, data_indices=data_indices, statistics=statistics)
 
 
 def test_normalizer_not_inplace(input_normalizer) -> None:
diff --git a/tests/preprocessing/test_preprocessor_remapper.py b/tests/preprocessing/test_preprocessor_remapper.py
new file mode 100644
index 0000000..d5bd52a
--- /dev/null
+++ b/tests/preprocessing/test_preprocessor_remapper.py
@@ -0,0 +1,69 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+
+import pytest
+import torch
+from omegaconf import DictConfig
+
+from anemoi.models.data_indices.collection import IndexCollection
+from anemoi.models.preprocessing.remapper import Remapper
+
+
+@pytest.fixture()
+def input_remapper():
+    config = DictConfig(
+        {
+            "diagnostics": {"log": {"code": {"level": "DEBUG"}}},
+            "data": {
+                "remapper": {
+                    "cos_sin": {
+                        "d": ["cos_d", "sin_d"],
+                    }
+                },
+                "forcing": ["z", "q"],
+                "diagnostic": ["other"],
+                "remapped": [
+                    {
+                        "d": ["cos_d", "sin_d"],
+                    }
+                ],
+            },
+        },
+    )
+    statistics = {}
+    name_to_index = {"x": 0, "y": 1, "z": 2, "q": 3, "d": 4, "other": 5}
+    data_indices = IndexCollection(config=config, name_to_index=name_to_index)
+    return Remapper(config=config.data.remapper, data_indices=data_indices, statistics=statistics)
+
+
+def test_remap_not_inplace(input_remapper) -> None:
+    x = torch.Tensor([[1.0, 2.0, 3.0, 4.0, 150.0, 5.0], [6.0, 7.0, 8.0, 9.0, 201.0, 10.0]])
+    input_remapper(x, in_place=False)
+    assert torch.allclose(x, torch.Tensor([[1.0, 2.0, 3.0, 4.0, 150.0, 5.0], [6.0, 7.0, 8.0, 9.0, 201.0, 10.0]]))
+
+
+def test_remap(input_remapper) -> None:
+    x = torch.Tensor([[1.0, 2.0, 3.0, 4.0, 150.0, 5.0], [6.0, 7.0, 8.0, 9.0, 201.0, 10.0]])
+    expected_output = torch.Tensor(
+        [[1.0, 2.0, 3.0, 4.0, 5.0, -0.8660254, 0.5], [6.0, 7.0, 8.0, 9.0, 10.0, -0.93358043, -0.35836795]]
+    )
+    assert torch.allclose(input_remapper.transform(x), expected_output)
+
+
+def test_inverse_transform(input_remapper) -> None:
+    x = torch.Tensor([[1.0, 2.0, 3.0, 4.0, 5.0, -0.8660254, 0.5], [6.0, 7.0, 8.0, 9.0, 10.0, -0.93358043, -0.35836795]])
+    expected_output = torch.Tensor([[1.0, 2.0, 3.0, 4.0, 150.0, 5.0], [6.0, 7.0, 8.0, 9.0, 201.0, 10.0]])
+    assert torch.allclose(input_remapper.inverse_transform(x), expected_output)
+
+
+def test_remap_inverse_transform(input_remapper) -> None:
+    x = torch.Tensor([[1.0, 2.0, 3.0, 4.0, 150.0, 5.0], [6.0, 7.0, 8.0, 9.0, 201.0, 10.0]])
+    assert torch.allclose(
+        input_remapper.inverse_transform(input_remapper.transform(x, in_place=False), in_place=False), x
+    )

From 79fd11a7e0cb1e0906f51f737637ad23f16d2be0 Mon Sep 17 00:00:00 2001
From: "S. Hahner" <44293258+sahahner@users.noreply.github.com>
Date: Tue, 10 Sep 2024 14:05:57 +0200
Subject: [PATCH 07/14] fix: data indices import. typo documentation (#34)

* fix: data indices import. typo documentation

* fix: data indices import. typo documentation

* typo

* tests: remapped variables need to be given as dictionary not list
---
 CHANGELOG.md                                        |  3 +--
 docs/modules/data_indices.rst                       |  2 +-
 src/anemoi/models/preprocessing/__init__.py         |  4 +---
 tests/data_indices/test_collection.py               | 10 ++++------
 tests/preprocessing/test_preprocessor_imputer.py    |  2 +-
 tests/preprocessing/test_preprocessor_normalizer.py |  2 +-
 tests/preprocessing/test_preprocessor_remapper.py   |  8 +++-----
 7 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9da050f..48c52c0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,8 +12,7 @@ Keep it human-readable, your future self will thank you!
 
 ### Added
  - CI workflow to update the changelog on release
-
-- Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables.
+ - Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables.
 
 ### Changed
 
diff --git a/docs/modules/data_indices.rst b/docs/modules/data_indices.rst
index d28fa04..c546795 100644
--- a/docs/modules/data_indices.rst
+++ b/docs/modules/data_indices.rst
@@ -53,7 +53,7 @@ remapper-preprocessor.
 
    data:
      remapped:
-       - d:
+       d:
          - "d_1"
          - "d_2"
 
diff --git a/src/anemoi/models/preprocessing/__init__.py b/src/anemoi/models/preprocessing/__init__.py
index 1368046..53017fb 100644
--- a/src/anemoi/models/preprocessing/__init__.py
+++ b/src/anemoi/models/preprocessing/__init__.py
@@ -8,15 +8,13 @@
 #
 
 import logging
-from typing import TYPE_CHECKING
 from typing import Optional
 
 import torch
 from torch import Tensor
 from torch import nn
 
-if TYPE_CHECKING:
-    from anemoi.models.data_indices.collection import IndexCollection
+from anemoi.models.data_indices.collection import IndexCollection
 
 LOGGER = logging.getLogger(__name__)
 
diff --git a/tests/data_indices/test_collection.py b/tests/data_indices/test_collection.py
index 78b3ecd..8505f8a 100644
--- a/tests/data_indices/test_collection.py
+++ b/tests/data_indices/test_collection.py
@@ -19,12 +19,10 @@ def data_indices():
             "data": {
                 "forcing": ["x", "e"],
                 "diagnostic": ["z", "q"],
-                "remapped": [
-                    {
-                        "e": ["e_1", "e_2"],
-                        "d": ["d_1", "d_2"],
-                    }
-                ],
+                "remapped": {
+                    "e": ["e_1", "e_2"],
+                    "d": ["d_1", "d_2"],
+                },
             },
         },
     )
diff --git a/tests/preprocessing/test_preprocessor_imputer.py b/tests/preprocessing/test_preprocessor_imputer.py
index e161c12..5218efc 100644
--- a/tests/preprocessing/test_preprocessor_imputer.py
+++ b/tests/preprocessing/test_preprocessor_imputer.py
@@ -26,7 +26,7 @@ def non_default_input_imputer():
                 "imputer": {"default": "none", "mean": ["y"], "maximum": ["x"], "none": ["z"], "minimum": ["q"]},
                 "forcing": ["z", "q"],
                 "diagnostic": ["other"],
-                "remapped": [],
+                "remapped": {},
             },
         },
     )
diff --git a/tests/preprocessing/test_preprocessor_normalizer.py b/tests/preprocessing/test_preprocessor_normalizer.py
index 9432b83..cc527e7 100644
--- a/tests/preprocessing/test_preprocessor_normalizer.py
+++ b/tests/preprocessing/test_preprocessor_normalizer.py
@@ -25,7 +25,7 @@ def input_normalizer():
                 "normalizer": {"default": "mean-std", "min-max": ["x"], "max": ["y"], "none": ["z"], "mean-std": ["q"]},
                 "forcing": ["z", "q"],
                 "diagnostic": ["other"],
-                "remapped": [],
+                "remapped": {},
             },
         },
     )
diff --git a/tests/preprocessing/test_preprocessor_remapper.py b/tests/preprocessing/test_preprocessor_remapper.py
index d5bd52a..86bdfde 100644
--- a/tests/preprocessing/test_preprocessor_remapper.py
+++ b/tests/preprocessing/test_preprocessor_remapper.py
@@ -28,11 +28,9 @@ def input_remapper():
                 },
                 "forcing": ["z", "q"],
                 "diagnostic": ["other"],
-                "remapped": [
-                    {
-                        "d": ["cos_d", "sin_d"],
-                    }
-                ],
+                "remapped": {
+                    "d": ["cos_d", "sin_d"],
+                },
             },
         },
     )

From b4d72f1d10117de0b050c7a43c8665c53dee289e Mon Sep 17 00:00:00 2001
From: Jesper Dramsch <jesper.dramsch@ecmwf.int>
Date: Wed, 11 Sep 2024 17:14:05 +0200
Subject: [PATCH 08/14] Fix: make remapper config object optional (#36)

* fix: make config object optional

* chore: add .envrc to gitignore

* docs: changelog
---
 .gitignore                                   | 1 +
 CHANGELOG.md                                 | 2 +-
 src/anemoi/models/data_indices/collection.py | 6 ++++--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index d8baf06..d92403b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,6 +121,7 @@ celerybeat.pid
 
 # Environments
 .env
+.envrc
 .venv
 env/
 venv/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 48c52c0..5433ccc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,7 +12,7 @@ Keep it human-readable, your future self will thank you!
 
 ### Added
  - CI workflow to update the changelog on release
- - Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables.
+ - Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables. With optional config keywords.
 
 ### Changed
 
diff --git a/src/anemoi/models/data_indices/collection.py b/src/anemoi/models/data_indices/collection.py
index 0d1031b..266c11a 100644
--- a/src/anemoi/models/data_indices/collection.py
+++ b/src/anemoi/models/data_indices/collection.py
@@ -30,9 +30,11 @@ def __init__(self, config, name_to_index) -> None:
         self.diagnostic = (
             [] if config.data.diagnostic is None else OmegaConf.to_container(config.data.diagnostic, resolve=True)
         )
-        # config.data.remapped is a list of diccionaries: every remapper is one entry of the list
+        # config.data.remapped is an optional dictionary with every remapper as one entry
         self.remapped = (
-            dict() if config.data.remapped is None else OmegaConf.to_container(config.data.remapped, resolve=True)
+            dict()
+            if config.data.get("remapped") is None
+            else OmegaConf.to_container(config.data.remapped, resolve=True)
         )
         self.forcing_remapped = self.forcing.copy()
 

From dbee83b3b9b48a920f0f6b23743d186213fd0b31 Mon Sep 17 00:00:00 2001
From: Jesper Dramsch <jesper.dramsch@ecmwf.int>
Date: Wed, 11 Sep 2024 17:24:04 +0200
Subject: [PATCH 09/14] Docs/changelog update 0.3.0 (#40)

* [create-pull-request] automated change

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update CHANGELOG.md

---------

Co-authored-by: JesperDramsch <2620316+JesperDramsch@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5433ccc..55fba06 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,21 +8,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 Please add your functional changes to the appropriate section in the PR.
 Keep it human-readable, your future self will thank you!
 
-## [Unreleased]
+## [Unreleased](https://github.com/ecmwf/anemoi-models/compare/0.3.0...HEAD)
+
+## [0.3.0](https://github.com/ecmwf/anemoi-models/compare/0.2.1...0.3.0) - Remapping of (meteorological) Variables
 
 ### Added
- - CI workflow to update the changelog on release
- - Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables. With optional config keywords.
+
+- CI workflow to update the changelog on release
+- Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables. With optional config keywords.
 
 ### Changed
 
- - Update CI to inherit from common infrastructue reusable workflows
- - run downstream-ci only when src and tests folders have changed
- - New error messages for wrongs graphs.
+- Update CI to inherit from common infrastructue reusable workflows
+- run downstream-ci only when src and tests folders have changed
+- New error messages for wrongs graphs.
 
 ### Removed
 
-## [0.2.1] - Dependency update
+## [0.2.1](https://github.com/ecmwf/anemoi-models/compare/0.2.0...0.2.1) - Dependency update
 
 ### Added
 
@@ -33,7 +36,7 @@ Keep it human-readable, your future self will thank you!
 
 - anemoi-datasets dependency
 
-## [0.2.0] - Support Heterodata
+## [0.2.0](https://github.com/ecmwf/anemoi-models/compare/0.1.0...0.2.0) - Support Heterodata
 
 ### Added
 
@@ -43,15 +46,12 @@ Keep it human-readable, your future self will thank you!
 
 - Updated to support new PyTorch Geometric HeteroData structure (defined by `anemoi-graphs` package).
 
-## [0.1.0] - Initial Release
+## [0.1.0](https://github.com/ecmwf/anemoi-models/releases/tag/0.1.0) - Initial Release
 
 ### Added
+
 - Documentation
 - Initial code release with models, layers, distributed, preprocessing, and data_indices
 - Added Changelog
 
 <!-- Add Git Diffs for Links above -->
-[unreleased]: https://github.com/ecmwf/anemoi-models/compare/0.2.1...HEAD
-[0.2.1]: https://github.com/ecmwf/anemoi-models/compare/0.2.0...0.2.1
-[0.2.0]: https://github.com/ecmwf/anemoi-models/compare/0.1.0...0.2.0
-[0.1.0]: https://github.com/ecmwf/anemoi-models/releases/tag/0.1.0

From 43846f042a44699772b4f6d9830bd3f85b2ffd4c Mon Sep 17 00:00:00 2001
From: Helen Theissen <helen.theissen@ecmwf.int>
Date: Wed, 18 Sep 2024 11:14:40 +0100
Subject: [PATCH 10/14] Chore/multiple fixes ci precommit (#41)

* fix: change pre-cmmit autoupdate schedule to monthly

* fix: change the merge strategy for Changelog to Union

* fix: add .envrc to .gitignore

* ci: ignore pre-commit-config and readthedocs for changelog updates

* ci: fix to correct hpc workflow call

* fix: update precommit config

* chore: update pre-commits

* feat: add codeowners file

* chore: update dependencies

* ci: add hpc-config

* docs: changelog

* fix: respond to review comments

---------

Co-authored-by: Jesper Dramsch <jesper.dramsch@ecmwf.int>
---
 .gitattributes                            |  1 +
 .github/CODEOWNERS                        |  6 ++++
 .github/ci-hpc-config.yml                 |  7 +++++
 .github/workflows/changelog-pr-update.yml |  3 ++
 .github/workflows/ci.yml                  |  2 +-
 .pre-commit-config.yaml                   | 31 +++++++++++++++------
 CHANGELOG.md                              |  5 ++++
 pyproject.toml                            | 34 ++++-------------------
 8 files changed, 52 insertions(+), 37 deletions(-)
 create mode 100644 .gitattributes
 create mode 100644 .github/CODEOWNERS
 create mode 100644 .github/ci-hpc-config.yml

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a19ade0
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+CHANGELOG.md merge=union
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000..a2c619f
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,6 @@
+# CODEOWNERS file
+
+# Protect workflow files
+/.github/ @theissenhelen @jesperdramsch @gmertes
+/.pre-commit-config.yaml @theissenhelen @jesperdramsch @gmertes
+/pyproject.toml @theissenhelen @jesperdramsch @gmertes
diff --git a/.github/ci-hpc-config.yml b/.github/ci-hpc-config.yml
new file mode 100644
index 0000000..5c27b03
--- /dev/null
+++ b/.github/ci-hpc-config.yml
@@ -0,0 +1,7 @@
+build:
+  python: '3.10'
+  modules:
+    - ninja
+  python_dependencies:
+    - ecmwf/anemoi-utils@develop
+  parallel: 64
diff --git a/.github/workflows/changelog-pr-update.yml b/.github/workflows/changelog-pr-update.yml
index 43acb1c..e7ed9a2 100644
--- a/.github/workflows/changelog-pr-update.yml
+++ b/.github/workflows/changelog-pr-update.yml
@@ -2,6 +2,9 @@ name: Check Changelog Update on PR
 on:
   pull_request:
     types: [assigned, opened, synchronize, reopened, labeled, unlabeled]
+    paths-ignore:
+      - .pre-commit-config.yaml
+      - .readthedocs.yaml
 jobs:
   Check-Changelog:
     name: Check Changelog Action
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5867ee0..8b2926b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -37,7 +37,7 @@ jobs:
   downstream-ci-hpc:
     name: downstream-ci-hpc
     if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}
-    uses: ecmwf-actions/downstream-ci/.github/workflows/downstream-ci.yml@main
+    uses: ecmwf-actions/downstream-ci/.github/workflows/downstream-ci-hpc.yml@main
     with:
       anemoi-models: ecmwf/anemoi-models@${{ github.event.pull_request.head.sha || github.sha }}
     secrets: inherit
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c042b1f..f3c3962 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,6 +20,12 @@ repos:
   - id: no-commit-to-branch # Prevent committing to main / master
   - id: check-added-large-files # Check for large files added to git
   - id: check-merge-conflict # Check for files that contain merge conflict
+-   repo: https://github.com/pre-commit/pygrep-hooks
+    rev: v1.10.0  # Use the ref you want to point at
+    hooks:
+    -   id: python-use-type-annotations # Check for missing type annotations
+    -   id: python-check-blanket-noqa # Check for # noqa: all
+    -   id: python-no-log-warn # Check for log.warn
 - repo: https://github.com/psf/black-pre-commit-mirror
   rev: 24.8.0
   hooks:
@@ -34,7 +40,7 @@ repos:
     - --force-single-line-imports
     - --profile black
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.6.3
+  rev: v0.6.4
   hooks:
   - id: ruff
     # Next line if for documenation cod snippets
@@ -45,7 +51,7 @@ repos:
     - --exit-non-zero-on-fix
     - --preview
 - repo: https://github.com/sphinx-contrib/sphinx-lint
-  rev: v0.9.1
+  rev: v1.0.0
   hooks:
   - id: sphinx-lint
 # For now, we use it. But it does not support a lot of sphinx features
@@ -59,12 +65,21 @@ repos:
   hooks:
   - id: docconvert
     args: ["numpy"]
-- repo: https://github.com/b8raoult/optional-dependencies-all
-  rev: "0.0.6"
-  hooks:
-  - id: optional-dependencies-all
-    args: ["--inplace", "--exclude-keys=dev,docs,tests", "--group=dev=all,docs,tests"]
 - repo: https://github.com/tox-dev/pyproject-fmt
-  rev: "2.2.1"
+  rev: "2.2.3"
   hooks:
   - id: pyproject-fmt
+-   repo: https://github.com/jshwi/docsig # Check docstrings against function sig
+    rev: v0.60.1
+    hooks:
+    -   id: docsig
+        args:
+        - --ignore-no-params # Allow docstrings without parameters
+        - --check-dunders    # Check dunder methods
+        - --check-overridden # Check overridden methods
+        - --check-protected  # Check protected methods
+        - --check-class      # Check class docstrings
+        - --disable=E113     # Disable empty docstrings
+        - --summary          # Print a summary
+ci:
+  autoupdate_schedule: monthly
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 55fba06..e6553c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,12 +16,17 @@ Keep it human-readable, your future self will thank you!
 
 - CI workflow to update the changelog on release
 - Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables. With optional config keywords.
+- Codeowners file
+- Pygrep precommit hooks
+- Docsig precommit hooks
+- Changelog merge strategy
 
 ### Changed
 
 - Update CI to inherit from common infrastructue reusable workflows
 - run downstream-ci only when src and tests folders have changed
 - New error messages for wrongs graphs.
+- Bugfixes for CI
 
 ### Removed
 
diff --git a/pyproject.toml b/pyproject.toml
index 05a99c4..214f82c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,20 +11,13 @@
 [build-system]
 build-backend = "setuptools.build_meta"
 
-requires = [
-  "setuptools>=61",
-  "setuptools-scm>=8",
-]
+requires = [ "setuptools>=61", "setuptools-scm>=8" ]
 
 [project]
 name = "anemoi-models"
 description = "A package to hold various functions to support training of ML models."
 readme = "README.md"
-keywords = [
-  "ai",
-  "models",
-  "tools",
-]
+keywords = [ "ai", "models", "tools" ]
 
 license = { file = "LICENSE" }
 authors = [
@@ -47,9 +40,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
 
-dynamic = [
-  "version",
-]
+dynamic = [ "version" ]
 dependencies = [
   "anemoi-utils>=0.1.9",
   "einops>=0.6.1",
@@ -57,19 +48,9 @@ dependencies = [
   "torch>=2.2",
   "torch-geometric>=2.3,<2.5",
 ]
-optional-dependencies.all = [
-]
+optional-dependencies.all = [  ]
 
-optional-dependencies.dev = [
-  "hypothesis",
-  "nbsphinx",
-  "pandoc",
-  "pytest",
-  "rstfmt",
-  "sphinx",
-  "sphinx-argparse<0.5",
-  "sphinx-rtd-theme",
-]
+optional-dependencies.dev = [ "anemoi-models[all,docs,tests]" ]
 
 optional-dependencies.docs = [
   "nbsphinx",
@@ -80,10 +61,7 @@ optional-dependencies.docs = [
   "sphinx-rtd-theme",
 ]
 
-optional-dependencies.tests = [
-  "hypothesis",
-  "pytest",
-]
+optional-dependencies.tests = [ "hypothesis", "pytest" ]
 
 urls.Documentation = "https://anemoi-models.readthedocs.io/"
 urls.Homepage = "https://github.com/ecmwf/anemoi-models/"

From 02192661aede10baef6b03682058c01378bdac29 Mon Sep 17 00:00:00 2001
From: Helen Theissen <helen.theissen@ecmwf.int>
Date: Wed, 18 Sep 2024 18:37:06 +0100
Subject: [PATCH 11/14] 11 add configurability to dropout in
 multiheadselfattention module (#12)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: add configurability to dropout in MultiHeadSelfAttention

Co-authored-by: Rilwan (Akanni) Adewoyin <18564167+Rilwan-Adewoyin@users.noreply.github.com>

* test: adjust to dropout_p

* doc: update changelog

* Feature/integrate reusable workflows (#16)

* ci: add public pr label

* ci: add readthedocs update check

* ci: add downstream ci

* ci: add ci-config

* chore(deps): remove unused dependency

* docs: update changelog

* ci: switch to main

* chore: changelog 0.2.1

* Update error messages from invalid sub_graph in model instantiation (#20)

* ci: inherit pypi publish flow (#17)

* ci: inherit pypi publish flow

Co-authored-by: Helen Theissen <helen.theissen@ecmwf.int>

* docs: add to changelog

* fix: typo in reusable workflow

* fix: another typo

* chore: bump actions/setup-python to v5

* ci: run downstream-ci for changes in src and tests

* docs: update changelog

---------

Co-authored-by: Helen Theissen <helen.theissen@ecmwf.int>

* Update CHANGELOG.md to KeepChangelog format

* [pre-commit.ci] pre-commit autoupdate (#25)

updates:
- [github.com/psf/black-pre-commit-mirror: 24.4.2 → 24.8.0](https://github.com/psf/black-pre-commit-mirror/compare/24.4.2...24.8.0)
- [github.com/astral-sh/ruff-pre-commit: v0.4.6 → v0.6.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.6...v0.6.2)
- [github.com/tox-dev/pyproject-fmt: 2.1.3 → 2.2.1](https://github.com/tox-dev/pyproject-fmt/compare/2.1.3...2.2.1)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Ci/changelog-release-updater (#26)

* ci: add changelof release updater

* docs: update changelog

* Feature/integrate reusable workflows (#16)

* ci: add public pr label

* ci: add readthedocs update check

* ci: add downstream ci

* ci: add ci-config

* chore(deps): remove unused dependency

* docs: update changelog

* ci: switch to main

* chore: changelog 0.2.1

* Update error messages from invalid sub_graph in model instantiation (#20)

* ci: inherit pypi publish flow (#17)

* ci: inherit pypi publish flow

Co-authored-by: Helen Theissen <helen.theissen@ecmwf.int>

* docs: add to changelog

* fix: typo in reusable workflow

* fix: another typo

* chore: bump actions/setup-python to v5

* ci: run downstream-ci for changes in src and tests

* docs: update changelog

---------

Co-authored-by: Helen Theissen <helen.theissen@ecmwf.int>

* Update CHANGELOG.md to KeepChangelog format

* Ci/changelog-release-updater (#26)

* ci: add changelof release updater

* docs: update changelog

---------

Co-authored-by: Rilwan (Akanni) Adewoyin <18564167+Rilwan-Adewoyin@users.noreply.github.com>
Co-authored-by: Gert Mertes <gert.mertes@ecmwf.int>
Co-authored-by: Mario Santa Cruz <48736305+JPXKQX@users.noreply.github.com>
Co-authored-by: Jesper Dramsch <jesper.dramsch@ecmwf.int>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md                                     |  3 +++
 src/anemoi/models/layers/attention.py            | 15 +++++++++++----
 src/anemoi/models/layers/block.py                | 12 ++++++++++--
 src/anemoi/models/layers/chunk.py                |  4 ++++
 src/anemoi/models/layers/processor.py            |  4 ++++
 tests/layers/block/test_block_transformer.py     | 13 ++++++++++---
 tests/layers/chunk/test_chunk_transformer.py     |  4 ++++
 .../processor/test_transformer_processor.py      |  6 ++++++
 tests/layers/test_attention.py                   | 16 ++++++++++------
 9 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e6553c7..ec4db72 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,8 @@ Keep it human-readable, your future self will thank you!
 
 ### Added
 
+- CI workflow to update the changelog on release
+- configurabilty of the dropout probability in the the MultiHeadSelfAttention module
 - CI workflow to update the changelog on release
 - Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables. With optional config keywords.
 - Codeowners file
@@ -21,6 +23,7 @@ Keep it human-readable, your future self will thank you!
 - Docsig precommit hooks
 - Changelog merge strategy
 
+
 ### Changed
 
 - Update CI to inherit from common infrastructue reusable workflows
diff --git a/src/anemoi/models/layers/attention.py b/src/anemoi/models/layers/attention.py
index 2063ad0..931e098 100644
--- a/src/anemoi/models/layers/attention.py
+++ b/src/anemoi/models/layers/attention.py
@@ -40,7 +40,7 @@ def __init__(
         bias: bool = False,
         is_causal: bool = False,
         window_size: Optional[int] = None,
-        dropout: float = 0.0,
+        dropout_p: float = 0.0,
     ):
         super().__init__()
 
@@ -48,11 +48,11 @@ def __init__(
             embed_dim % num_heads == 0
         ), f"Embedding dimension ({embed_dim}) must be divisible by number of heads ({num_heads})"
 
-        self.dropout = dropout
         self.num_heads = num_heads
         self.embed_dim = embed_dim
         self.head_dim = embed_dim // num_heads  # q k v
         self.window_size = (window_size, window_size)  # flash attention
+        self.dropout_p = dropout_p
         self.is_causal = is_causal
 
         self.lin_qkv = nn.Linear(embed_dim, 3 * embed_dim, bias=bias)
@@ -86,15 +86,22 @@ def forward(
         query = shard_heads(query, shapes=shapes, mgroup=model_comm_group)
         key = shard_heads(key, shapes=shapes, mgroup=model_comm_group)
         value = shard_heads(value, shapes=shapes, mgroup=model_comm_group)
+        dropout_p = self.dropout_p if self.training else 0.0
 
         if _FLASH_ATTENTION_AVAILABLE:
             query, key, value = (
                 einops.rearrange(t, "batch heads grid vars -> batch grid heads vars") for t in (query, key, value)
             )
-            out = self.attention(query, key, value, causal=False, window_size=self.window_size)
+            out = self.attention(query, key, value, causal=False, window_size=self.window_size, dropout_p=dropout_p)
             out = einops.rearrange(out, "batch grid heads vars -> batch heads grid vars")
         else:
-            out = self.attention(query, key, value, is_causal=False)  # expects (batch heads grid variable) format
+            out = self.attention(
+                query,
+                key,
+                value,
+                is_causal=False,
+                dropout_p=dropout_p,
+            )  # expects (batch heads grid variable) format
 
         out = shard_sequence(out, shapes=shapes, mgroup=model_comm_group)
         out = einops.rearrange(out, "batch heads grid vars -> (batch grid) (heads vars)")
diff --git a/src/anemoi/models/layers/block.py b/src/anemoi/models/layers/block.py
index ba29607..7fd3627 100644
--- a/src/anemoi/models/layers/block.py
+++ b/src/anemoi/models/layers/block.py
@@ -55,7 +55,15 @@ def forward(
 class TransformerProcessorBlock(BaseBlock):
     """Transformer block with MultiHeadSelfAttention and MLPs."""
 
-    def __init__(self, num_channels, hidden_dim, num_heads, activation, window_size: int):
+    def __init__(
+        self,
+        num_channels: int,
+        hidden_dim: int,
+        num_heads: int,
+        activation: str,
+        window_size: int,
+        dropout_p: float = 0.0,
+    ):
         super().__init__()
 
         try:
@@ -72,7 +80,7 @@ def __init__(self, num_channels, hidden_dim, num_heads, activation, window_size:
             window_size=window_size,
             bias=False,
             is_causal=False,
-            dropout=0.0,
+            dropout_p=dropout_p,
         )
 
         self.mlp = nn.Sequential(
diff --git a/src/anemoi/models/layers/chunk.py b/src/anemoi/models/layers/chunk.py
index 61dec34..87d0ac7 100644
--- a/src/anemoi/models/layers/chunk.py
+++ b/src/anemoi/models/layers/chunk.py
@@ -73,6 +73,7 @@ def __init__(
         num_heads: int = 16,
         mlp_hidden_ratio: int = 4,
         activation: str = "GELU",
+        dropout_p: float = 0.0,
     ) -> None:
         """Initialize TransformerProcessor.
 
@@ -88,6 +89,8 @@ def __init__(
             ratio of mlp hidden dimension to embedding dimension, default 4
         activation : str, optional
             Activation function, by default "GELU"
+        dropout_p: float
+            Dropout probability used for multi-head self attention, default 0.0
         """
         super().__init__(num_channels=num_channels, num_layers=num_layers)
 
@@ -98,6 +101,7 @@ def __init__(
             num_heads=num_heads,
             activation=activation,
             window_size=window_size,
+            dropout_p=dropout_p,
         )
 
     def forward(
diff --git a/src/anemoi/models/layers/processor.py b/src/anemoi/models/layers/processor.py
index bb33609..6ba8eb1 100644
--- a/src/anemoi/models/layers/processor.py
+++ b/src/anemoi/models/layers/processor.py
@@ -95,6 +95,7 @@ def __init__(
         cpu_offload: bool = False,
         num_heads: int = 16,
         mlp_hidden_ratio: int = 4,
+        dropout_p: float = 0.1,
         **kwargs,
     ) -> None:
         """Initialize TransformerProcessor.
@@ -113,6 +114,8 @@ def __init__(
             ratio of mlp hidden dimension to embedding dimension, default 4
         activation : str, optional
             Activation function, by default "GELU"
+        dropout_p: float, optional
+            Dropout probability used for multi-head self attention, default 0.0
         """
         super().__init__(
             num_channels=num_channels,
@@ -133,6 +136,7 @@ def __init__(
             num_layers=self.chunk_size,
             window_size=window_size,
             activation=activation,
+            dropout_p=dropout_p,
         )
 
         self.offload_layers(cpu_offload)
diff --git a/tests/layers/block/test_block_transformer.py b/tests/layers/block/test_block_transformer.py
index 97c274f..2e63386 100644
--- a/tests/layers/block/test_block_transformer.py
+++ b/tests/layers/block/test_block_transformer.py
@@ -29,11 +29,14 @@ class TestTransformerProcessorBlock:
         num_heads=st.integers(min_value=1, max_value=10),
         activation=st.sampled_from(["ReLU", "GELU", "Tanh"]),
         window_size=st.integers(min_value=1, max_value=512),
+        dropout_p=st.floats(min_value=0.0, max_value=1.0),
     )
     @settings(max_examples=10)
-    def test_init(self, factor_attention_heads, hidden_dim, num_heads, activation, window_size):
+    def test_init(self, factor_attention_heads, hidden_dim, num_heads, activation, window_size, dropout_p):
         num_channels = num_heads * factor_attention_heads
-        block = TransformerProcessorBlock(num_channels, hidden_dim, num_heads, activation, window_size)
+        block = TransformerProcessorBlock(
+            num_channels, hidden_dim, num_heads, activation, window_size, dropout_p=dropout_p
+        )
         assert isinstance(block, TransformerProcessorBlock)
 
         assert isinstance(block.layer_norm1, nn.LayerNorm)
@@ -49,6 +52,7 @@ def test_init(self, factor_attention_heads, hidden_dim, num_heads, activation, w
         window_size=st.integers(min_value=1, max_value=512),
         shapes=st.lists(st.integers(min_value=1, max_value=10), min_size=3, max_size=3),
         batch_size=st.integers(min_value=1, max_value=40),
+        dropout_p=st.floats(min_value=0.0, max_value=1.0),
     )
     @settings(max_examples=10)
     def test_forward_output(
@@ -60,9 +64,12 @@ def test_forward_output(
         window_size,
         shapes,
         batch_size,
+        dropout_p,
     ):
         num_channels = num_heads * factor_attention_heads
-        block = TransformerProcessorBlock(num_channels, hidden_dim, num_heads, activation, window_size)
+        block = TransformerProcessorBlock(
+            num_channels, hidden_dim, num_heads, activation, window_size, dropout_p=dropout_p
+        )
 
         x = torch.randn((batch_size, num_channels))
 
diff --git a/tests/layers/chunk/test_chunk_transformer.py b/tests/layers/chunk/test_chunk_transformer.py
index 1fe7c6d..5449e97 100644
--- a/tests/layers/chunk/test_chunk_transformer.py
+++ b/tests/layers/chunk/test_chunk_transformer.py
@@ -20,6 +20,7 @@ def init(self):
         mlp_hidden_ratio: int = 4
         activation: str = "GELU"
         window_size: int = 13
+        dropout_p: float = 0.1
 
         # num_heads must be evenly divisible by num_channels for MHSA
         return (
@@ -29,6 +30,7 @@ def init(self):
             mlp_hidden_ratio,
             activation,
             window_size,
+            dropout_p,
         )
 
     @pytest.fixture
@@ -40,6 +42,7 @@ def processor_chunk(self, init):
             mlp_hidden_ratio,
             activation,
             window_size,
+            dropout_p,
         ) = init
         return TransformerProcessorChunk(
             num_channels=num_channels,
@@ -48,6 +51,7 @@ def processor_chunk(self, init):
             mlp_hidden_ratio=mlp_hidden_ratio,
             activation=activation,
             window_size=window_size,
+            dropout_p=dropout_p,
         )
 
     def test_all_blocks(self, processor_chunk):
diff --git a/tests/layers/processor/test_transformer_processor.py b/tests/layers/processor/test_transformer_processor.py
index d359c27..305af41 100644
--- a/tests/layers/processor/test_transformer_processor.py
+++ b/tests/layers/processor/test_transformer_processor.py
@@ -21,6 +21,7 @@ def transformer_processor_init():
     cpu_offload = False
     num_heads = 16
     mlp_hidden_ratio = 4
+    dropout_p = 0.1
     return (
         num_layers,
         window_size,
@@ -30,6 +31,7 @@ def transformer_processor_init():
         cpu_offload,
         num_heads,
         mlp_hidden_ratio,
+        dropout_p,
     )
 
 
@@ -44,6 +46,7 @@ def transformer_processor(transformer_processor_init):
         cpu_offload,
         num_heads,
         mlp_hidden_ratio,
+        dropout_p,
     ) = transformer_processor_init
     return TransformerProcessor(
         num_layers=num_layers,
@@ -54,6 +57,7 @@ def transformer_processor(transformer_processor_init):
         cpu_offload=cpu_offload,
         num_heads=num_heads,
         mlp_hidden_ratio=mlp_hidden_ratio,
+        dropout_p=dropout_p,
     )
 
 
@@ -67,6 +71,7 @@ def test_transformer_processor_init(transformer_processor, transformer_processor
         _cpu_offload,
         _num_heads,
         _mlp_hidden_ratio,
+        _dropout_p,
     ) = transformer_processor_init
     assert isinstance(transformer_processor, TransformerProcessor)
     assert transformer_processor.num_chunks == num_chunks
@@ -84,6 +89,7 @@ def test_transformer_processor_forward(transformer_processor, transformer_proces
         _cpu_offload,
         _num_heads,
         _mlp_hidden_ratio,
+        _dropout_p,
     ) = transformer_processor_init
     gridsize = 100
     batch_size = 1
diff --git a/tests/layers/test_attention.py b/tests/layers/test_attention.py
index ffeaebc..9457317 100644
--- a/tests/layers/test_attention.py
+++ b/tests/layers/test_attention.py
@@ -18,17 +18,19 @@
 @given(
     num_heads=st.integers(min_value=1, max_value=50),
     embed_dim_multiplier=st.integers(min_value=1, max_value=10),
+    dropout_p=st.floats(min_value=0.0, max_value=1.0),
 )
-def test_multi_head_self_attention_init(num_heads, embed_dim_multiplier):
+def test_multi_head_self_attention_init(num_heads, embed_dim_multiplier, dropout_p):
     embed_dim = (
         num_heads * embed_dim_multiplier
     )  # TODO: Make assert in MHSA to check if embed_dim is divisible by num_heads
-    mhsa = MultiHeadSelfAttention(num_heads, embed_dim)
+    mhsa = MultiHeadSelfAttention(num_heads, embed_dim, dropout_p=dropout_p)
 
     assert isinstance(mhsa, nn.Module)
     assert mhsa.num_heads == num_heads
     assert mhsa.embed_dim == embed_dim
     assert mhsa.head_dim == embed_dim // num_heads
+    assert dropout_p == mhsa.dropout_p
 
 
 @pytest.mark.gpu
@@ -36,11 +38,12 @@ def test_multi_head_self_attention_init(num_heads, embed_dim_multiplier):
     batch_size=st.integers(min_value=1, max_value=64),
     num_heads=st.integers(min_value=1, max_value=20),
     embed_dim_multiplier=st.integers(min_value=1, max_value=10),
+    dropout_p=st.floats(min_value=0.0, max_value=1.0),
 )
 @settings(deadline=None)
-def test_multi_head_self_attention_forward(batch_size, num_heads, embed_dim_multiplier):
+def test_multi_head_self_attention_forward(batch_size, num_heads, embed_dim_multiplier, dropout_p):
     embed_dim = num_heads * embed_dim_multiplier
-    mhsa = MultiHeadSelfAttention(num_heads, embed_dim)
+    mhsa = MultiHeadSelfAttention(num_heads, embed_dim, dropout_p=dropout_p)
 
     x = torch.randn(batch_size * 2, embed_dim)
     shapes = [list(x.shape)]
@@ -54,10 +57,11 @@ def test_multi_head_self_attention_forward(batch_size, num_heads, embed_dim_mult
     batch_size=st.integers(min_value=1, max_value=64),
     num_heads=st.integers(min_value=1, max_value=20),
     embed_dim_multiplier=st.integers(min_value=1, max_value=10),
+    dropout_p=st.floats(min_value=0.0, max_value=1.0),
 )
-def test_multi_head_self_attention_backward(batch_size, num_heads, embed_dim_multiplier):
+def test_multi_head_self_attention_backward(batch_size, num_heads, embed_dim_multiplier, dropout_p):
     embed_dim = num_heads * embed_dim_multiplier
-    mhsa = MultiHeadSelfAttention(num_heads, embed_dim)
+    mhsa = MultiHeadSelfAttention(num_heads, embed_dim, dropout_p=dropout_p)
 
     x = torch.randn(batch_size * 2, embed_dim, requires_grad=True)
     shapes = [list(x.shape)]

From 90ef59c2abb41b74a273b6b8863a64bdb26fc7b6 Mon Sep 17 00:00:00 2001
From: gabrieloks <116646686+gabrieloks@users.noreply.github.com>
Date: Mon, 23 Sep 2024 13:55:09 +0100
Subject: [PATCH 12/14] activation functions added for bounded outputs (#14)

* activation functions added for bounded outputs

* generalised fraction normalisation

* precommit changes

* precommit changes

* chore: mv bounding to post-processors

* feat: make bounding strategies torch Module

* refactor: pre-build indices during init

* refactor: nn.module in place modification

* refactor: mv bounding to layers

* refactor: implement bounding ModuleList

* docs: add changelog

* refactor: mv bounding config to models

* feat: enable 1-1 variable remapping in preprocessors

* test: create tests and fix code

* test: add a test for hydra instantiating of bounding

* fix: naming

* refactor: reduce verboseness

* docs: add comments

* fix: inject name_to_index on initiation

* fixed reading of statistics remapping

* revert

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update test_preprocessor_normalizer.py

* Update encoder_processor_decoder.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* docs: added a comment on special keys

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* docs: add docstrings

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix: changelog

---------

Co-authored-by: Jesper Dramsch <jesper.dramsch@ecmwf.int>
---
 CHANGELOG.md                                  |  21 ++--
 src/anemoi/models/layers/bounding.py          | 115 ++++++++++++++++++
 .../models/encoder_processor_decoder.py       |  15 +++
 src/anemoi/models/preprocessing/__init__.py   |  20 ++-
 src/anemoi/models/preprocessing/normalizer.py |  22 ++++
 tests/layers/test_bounding.py                 |  92 ++++++++++++++
 .../test_preprocessor_normalizer.py           |  43 +++++++
 7 files changed, 319 insertions(+), 9 deletions(-)
 create mode 100644 src/anemoi/models/layers/bounding.py
 create mode 100644 tests/layers/test_bounding.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ec4db72..7811ef3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,26 +10,31 @@ Keep it human-readable, your future self will thank you!
 
 ## [Unreleased](https://github.com/ecmwf/anemoi-models/compare/0.3.0...HEAD)
 
-## [0.3.0](https://github.com/ecmwf/anemoi-models/compare/0.2.1...0.3.0) - Remapping of (meteorological) Variables
-
 ### Added
-
-- CI workflow to update the changelog on release
-- configurabilty of the dropout probability in the the MultiHeadSelfAttention module
-- CI workflow to update the changelog on release
-- Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables. With optional config keywords.
 - Codeowners file
 - Pygrep precommit hooks
 - Docsig precommit hooks
 - Changelog merge strategy
+- configurabilty of the dropout probability in the the MultiHeadSelfAttention module
+- Variable Bounding as configurable model layers [#13](https://github.com/ecmwf/anemoi-models/issues/13)
+
+### Changed
+- Bugfixes for CI
+
+### Removed
+
+## [0.3.0](https://github.com/ecmwf/anemoi-models/compare/0.2.1...0.3.0) - Remapping of (meteorological) Variables
 
+### Added
+
+- CI workflow to update the changelog on release
+- Remapper: Preprocessor for remapping one variable to multiple ones. Includes changes to the data indices since the remapper changes the number of variables. With optional config keywords.
 
 ### Changed
 
 - Update CI to inherit from common infrastructue reusable workflows
 - run downstream-ci only when src and tests folders have changed
 - New error messages for wrongs graphs.
-- Bugfixes for CI
 
 ### Removed
 
diff --git a/src/anemoi/models/layers/bounding.py b/src/anemoi/models/layers/bounding.py
new file mode 100644
index 0000000..3791ff2
--- /dev/null
+++ b/src/anemoi/models/layers/bounding.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+
+from abc import ABC
+from abc import abstractmethod
+
+import torch
+from torch import nn
+
+from anemoi.models.data_indices.tensor import InputTensorIndex
+
+
+class BaseBounding(nn.Module, ABC):
+    """Abstract base class for bounding strategies.
+
+    This class defines an interface for bounding strategies which are used to apply a specific
+    restriction to the predictions of a model.
+    """
+
+    def __init__(
+        self,
+        *,
+        variables: list[str],
+        name_to_index: dict,
+    ) -> None:
+        super().__init__()
+
+        self.name_to_index = name_to_index
+        self.variables = variables
+        self.data_index = self._create_index(variables=self.variables)
+
+    def _create_index(self, variables: list[str]) -> InputTensorIndex:
+        return InputTensorIndex(includes=variables, excludes=[], name_to_index=self.name_to_index)._only
+
+    @abstractmethod
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Applies the bounding to the predictions.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            The tensor containing the predictions that will be bounded.
+
+        Returns
+        -------
+        torch.Tensor
+        A tensor with the bounding applied.
+        """
+        pass
+
+
+class ReluBounding(BaseBounding):
+    """Initializes the bounding with a ReLU activation / zero clamping."""
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x[..., self.data_index] = torch.nn.functional.relu(x[..., self.data_index])
+        return x
+
+
+class HardtanhBounding(BaseBounding):
+    """Initializes the bounding with specified minimum and maximum values for bounding.
+
+    Parameters
+    ----------
+    variables : list[str]
+        A list of strings representing the variables that will be bounded.
+    name_to_index : dict
+        A dictionary mapping the variable names to their corresponding indices.
+    min_val : float
+        The minimum value for the HardTanh activation.
+    max_val : float
+        The maximum value for the HardTanh activation.
+    """
+
+    def __init__(self, *, variables: list[str], name_to_index: dict, min_val: float, max_val: float) -> None:
+        super().__init__(variables=variables, name_to_index=name_to_index)
+        self.min_val = min_val
+        self.max_val = max_val
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x[..., self.data_index] = torch.nn.functional.hardtanh(
+            x[..., self.data_index], min_val=self.min_val, max_val=self.max_val
+        )
+        return x
+
+
+class FractionBounding(HardtanhBounding):
+    """Initializes the FractionBounding with specified parameters.
+
+    Parameters
+    ----------
+    variables : list[str]
+        A list of strings representing the variables that will be bounded.
+    name_to_index : dict
+        A dictionary mapping the variable names to their corresponding indices.
+    min_val : float
+        The minimum value for the HardTanh activation.
+    max_val : float
+        The maximum value for the HardTanh activation.
+    total_var : str
+        A string representing a variable from which a secondary variable is derived. For
+        example, in the case of convective precipitation (Cp), total_var = Tp (total precipitation).
+    """
+
+    def __init__(
+        self, *, variables: list[str], name_to_index: dict, min_val: float, max_val: float, total_var: str
+    ) -> None:
+        super().__init__(variables=variables, name_to_index=name_to_index, min_val=min_val, max_val=max_val)
+        self.total_variable = self._create_index(variables=[total_var])
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # Apply the HardTanh bounding  to the data_index variables
+        x = super().forward(x)
+        # Calculate the fraction of the total variable
+        x[..., self.data_index] *= x[..., self.total_variable]
+        return x
diff --git a/src/anemoi/models/models/encoder_processor_decoder.py b/src/anemoi/models/models/encoder_processor_decoder.py
index 3414dc5..b043b0c 100644
--- a/src/anemoi/models/models/encoder_processor_decoder.py
+++ b/src/anemoi/models/models/encoder_processor_decoder.py
@@ -67,6 +67,8 @@ def __init__(
         self._register_latlon("data", self._graph_name_data)
         self._register_latlon("hidden", self._graph_name_hidden)
 
+        self.data_indices = data_indices
+
         self.num_channels = config.model.num_channels
 
         input_dim = self.multi_step * self.num_input_channels + self.latlons_data.shape[1] + self.trainable_data_size
@@ -103,6 +105,14 @@ def __init__(
             dst_grid_size=self._data_grid_size,
         )
 
+        # Instantiation of model output bounding functions (e.g., to ensure outputs like TP are positive definite)
+        self.boundings = nn.ModuleList(
+            [
+                instantiate(cfg, name_to_index=self.data_indices.model.output.name_to_index)
+                for cfg in getattr(config.model, "bounding", [])
+            ]
+        )
+
     def _calculate_shapes_and_indices(self, data_indices: dict) -> None:
         self.num_input_channels = len(data_indices.internal_model.input)
         self.num_output_channels = len(data_indices.internal_model.output)
@@ -251,4 +261,9 @@ def forward(self, x: Tensor, model_comm_group: Optional[ProcessGroup] = None) ->
 
         # residual connection (just for the prognostic variables)
         x_out[..., self._internal_output_idx] += x[:, -1, :, :, self._internal_input_idx]
+
+        for bounding in self.boundings:
+            # bounding performed in the order specified in the config file
+            x_out = bounding(x_out)
+
         return x_out
diff --git a/src/anemoi/models/preprocessing/__init__.py b/src/anemoi/models/preprocessing/__init__.py
index 53017fb..cc2cb4f 100644
--- a/src/anemoi/models/preprocessing/__init__.py
+++ b/src/anemoi/models/preprocessing/__init__.py
@@ -38,7 +38,23 @@ def __init__(
             Data indices for input and output variables
         statistics : dict
             Data statistics dictionary
+        data_indices : dict
+            Data indices for input and output variables
+
+        Attributes
+        ----------
+        default : str
+            Default method for variables not specified in the config
+        method_config : dict
+            Dictionary of the methods with lists of variables
+        methods : dict
+            Dictionary of the variables with methods
+        data_indices : IndexCollection
+            Data indices for input and output variables
+        remap : dict
+            Dictionary of the variables with remapped names in the config
         """
+
         super().__init__()
 
         self.default, self.method_config = self._process_config(config)
@@ -47,8 +63,10 @@ def __init__(
         self.data_indices = data_indices
 
     def _process_config(self, config):
+        _special_keys = ["default", "remap"]  # Keys that do not contain a list of variables in a preprocessing method.
         default = config.get("default", "none")
-        method_config = {k: v for k, v in config.items() if k != "default" and v is not None and v != "none"}
+        self.remap = config.get("remap", {})
+        method_config = {k: v for k, v in config.items() if k not in _special_keys and v is not None and v != "none"}
 
         if not method_config:
             LOGGER.warning(
diff --git a/src/anemoi/models/preprocessing/normalizer.py b/src/anemoi/models/preprocessing/normalizer.py
index bc75466..ee6a4f5 100644
--- a/src/anemoi/models/preprocessing/normalizer.py
+++ b/src/anemoi/models/preprocessing/normalizer.py
@@ -49,6 +49,16 @@ def __init__(
         mean = statistics["mean"]
         stdev = statistics["stdev"]
 
+        # Optionally reuse statistic of one variable for another variable
+        statistics_remap = {}
+        for remap, source in self.remap.items():
+            idx_src, idx_remap = name_to_index_training_input[source], name_to_index_training_input[remap]
+            statistics_remap[idx_remap] = (minimum[idx_src], maximum[idx_src], mean[idx_src], stdev[idx_src])
+
+        # Two-step to avoid overwriting the original statistics in the loop (this reduces dependence on order)
+        for idx, new_stats in statistics_remap.items():
+            minimum[idx], maximum[idx], mean[idx], stdev[idx] = new_stats
+
         self._validate_normalization_inputs(name_to_index_training_input, minimum, maximum, mean, stdev)
 
         _norm_add = np.zeros((minimum.size,), dtype=np.float32)
@@ -56,6 +66,7 @@ def __init__(
 
         for name, i in name_to_index_training_input.items():
             method = self.methods.get(name, self.default)
+
             if method == "mean-std":
                 LOGGER.debug(f"Normalizing: {name} is mean-std-normalised.")
                 if stdev[i] < (mean[i] * 1e-6):
@@ -63,6 +74,13 @@ def __init__(
                 _norm_mul[i] = 1 / stdev[i]
                 _norm_add[i] = -mean[i] / stdev[i]
 
+            elif method == "std":
+                LOGGER.debug(f"Normalizing: {name} is std-normalised.")
+                if stdev[i] < (mean[i] * 1e-6):
+                    warnings.warn(f"Normalizing: the field seems to have only one value {mean[i]}")
+                _norm_mul[i] = 1 / stdev[i]
+                _norm_add[i] = 0
+
             elif method == "min-max":
                 LOGGER.debug(f"Normalizing: {name} is min-max-normalised to [0, 1].")
                 x = maximum[i] - minimum[i]
@@ -92,16 +110,20 @@ def _validate_normalization_inputs(self, name_to_index_training_input: dict, min
             f"Error parsing methods in InputNormalizer methods ({len(self.methods)}) "
             f"and entries in config ({sum(len(v) for v in self.method_config)}) do not match."
         )
+
+        # Check that all sizes align
         n = minimum.size
         assert maximum.size == n, (maximum.size, n)
         assert mean.size == n, (mean.size, n)
         assert stdev.size == n, (stdev.size, n)
 
+        # Check for typos in method config
         assert isinstance(self.methods, dict)
         for name, method in self.methods.items():
             assert name in name_to_index_training_input, f"{name} is not a valid variable name"
             assert method in [
                 "mean-std",
+                "std",
                 # "robust",
                 "min-max",
                 "max",
diff --git a/tests/layers/test_bounding.py b/tests/layers/test_bounding.py
new file mode 100644
index 0000000..87619cd
--- /dev/null
+++ b/tests/layers/test_bounding.py
@@ -0,0 +1,92 @@
+import pytest
+import torch
+from anemoi.utils.config import DotDict
+from hydra.utils import instantiate
+
+from anemoi.models.layers.bounding import FractionBounding
+from anemoi.models.layers.bounding import HardtanhBounding
+from anemoi.models.layers.bounding import ReluBounding
+
+
+@pytest.fixture
+def config():
+    return DotDict({"variables": ["var1", "var2"], "total_var": "total_var"})
+
+
+@pytest.fixture
+def name_to_index():
+    return {"var1": 0, "var2": 1, "total_var": 2}
+
+
+@pytest.fixture
+def input_tensor():
+    return torch.tensor([[-1.0, 2.0, 3.0], [4.0, -5.0, 6.0], [0.5, 0.5, 0.5]])
+
+
+def test_relu_bounding(config, name_to_index, input_tensor):
+    bounding = ReluBounding(variables=config.variables, name_to_index=name_to_index)
+    output = bounding(input_tensor.clone())
+    expected_output = torch.tensor([[0.0, 2.0, 3.0], [4.0, 0.0, 6.0], [0.5, 0.5, 0.5]])
+    assert torch.equal(output, expected_output)
+
+
+def test_hardtanh_bounding(config, name_to_index, input_tensor):
+    minimum, maximum = -1.0, 1.0
+    bounding = HardtanhBounding(
+        variables=config.variables, name_to_index=name_to_index, min_val=minimum, max_val=maximum
+    )
+    output = bounding(input_tensor.clone())
+    expected_output = torch.tensor([[minimum, maximum, 3.0], [maximum, minimum, 6.0], [0.5, 0.5, 0.5]])
+    assert torch.equal(output, expected_output)
+
+
+def test_fraction_bounding(config, name_to_index, input_tensor):
+    bounding = FractionBounding(
+        variables=config.variables, name_to_index=name_to_index, min_val=0.0, max_val=1.0, total_var=config.total_var
+    )
+    output = bounding(input_tensor.clone())
+    expected_output = torch.tensor([[0.0, 3.0, 3.0], [6.0, 0.0, 6.0], [0.25, 0.25, 0.5]])
+
+    assert torch.equal(output, expected_output)
+
+
+def test_multi_chained_bounding(config, name_to_index, input_tensor):
+    # Apply Relu first on the first variable only
+    bounding1 = ReluBounding(variables=config.variables[:-1], name_to_index=name_to_index)
+    expected_output = torch.tensor([[0.0, 2.0, 3.0], [4.0, -5.0, 6.0], [0.5, 0.5, 0.5]])
+    # Check intemediate result
+    assert torch.equal(bounding1(input_tensor.clone()), expected_output)
+    minimum, maximum = 0.5, 1.75
+    bounding2 = HardtanhBounding(
+        variables=config.variables, name_to_index=name_to_index, min_val=minimum, max_val=maximum
+    )
+    # Use full chaining on the input tensor
+    output = bounding2(bounding1(input_tensor.clone()))
+    # Data with Relu applied first and then Hardtanh
+    expected_output = torch.tensor([[minimum, maximum, 3.0], [maximum, minimum, 6.0], [0.5, 0.5, 0.5]])
+    assert torch.equal(output, expected_output)
+
+
+def test_hydra_instantiate_bounding(config, name_to_index, input_tensor):
+    layer_definitions = [
+        {
+            "_target_": "anemoi.models.layers.bounding.ReluBounding",
+            "variables": config.variables,
+        },
+        {
+            "_target_": "anemoi.models.layers.bounding.HardtanhBounding",
+            "variables": config.variables,
+            "min_val": 0.0,
+            "max_val": 1.0,
+        },
+        {
+            "_target_": "anemoi.models.layers.bounding.FractionBounding",
+            "variables": config.variables,
+            "min_val": 0.0,
+            "max_val": 1.0,
+            "total_var": config.total_var,
+        },
+    ]
+    for layer_definition in layer_definitions:
+        bounding = instantiate(layer_definition, name_to_index=name_to_index)
+        bounding(input_tensor.clone())
diff --git a/tests/preprocessing/test_preprocessor_normalizer.py b/tests/preprocessing/test_preprocessor_normalizer.py
index cc527e7..8056865 100644
--- a/tests/preprocessing/test_preprocessor_normalizer.py
+++ b/tests/preprocessing/test_preprocessor_normalizer.py
@@ -40,6 +40,37 @@ def input_normalizer():
     return InputNormalizer(config=config.data.normalizer, data_indices=data_indices, statistics=statistics)
 
 
+@pytest.fixture()
+def remap_normalizer():
+    config = DictConfig(
+        {
+            "diagnostics": {"log": {"code": {"level": "DEBUG"}}},
+            "data": {
+                "normalizer": {
+                    "default": "mean-std",
+                    "remap": {"x": "z", "y": "x"},
+                    "min-max": ["x"],
+                    "max": ["y"],
+                    "none": ["z"],
+                    "mean-std": ["q"],
+                },
+                "forcing": ["z", "q"],
+                "diagnostic": ["other"],
+                "remapped": {},
+            },
+        },
+    )
+    statistics = {
+        "mean": np.array([1.0, 2.0, 3.0, 4.5, 3.0]),
+        "stdev": np.array([0.5, 0.5, 0.5, 1, 14]),
+        "minimum": np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
+        "maximum": np.array([11.0, 10.0, 10.0, 10.0, 10.0]),
+    }
+    name_to_index = {"x": 0, "y": 1, "z": 2, "q": 3, "other": 4}
+    data_indices = IndexCollection(config=config, name_to_index=name_to_index)
+    return InputNormalizer(config=config.data.normalizer, statistics=statistics, data_indices=data_indices)
+
+
 def test_normalizer_not_inplace(input_normalizer) -> None:
     x = torch.Tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [6.0, 7.0, 8.0, 9.0, 10.0]])
     input_normalizer(x, in_place=False)
@@ -87,3 +118,15 @@ def test_normalize_inverse_transform(input_normalizer) -> None:
     assert torch.allclose(
         input_normalizer.inverse_transform(input_normalizer.transform(x, in_place=False), in_place=False), x
     )
+
+
+def test_normalizer_not_inplace_remap(remap_normalizer) -> None:
+    x = torch.Tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [6.0, 7.0, 8.0, 9.0, 10.0]])
+    remap_normalizer(x, in_place=False)
+    assert torch.allclose(x, torch.Tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [6.0, 7.0, 8.0, 9.0, 10.0]]))
+
+
+def test_normalize_remap(remap_normalizer) -> None:
+    x = torch.Tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [6.0, 7.0, 8.0, 9.0, 10.0]])
+    expected_output = torch.Tensor([[0.0, 2 / 11, 3.0, -0.5, 1 / 7], [5 / 9, 7 / 11, 8.0, 4.5, 0.5]])
+    assert torch.allclose(remap_normalizer.transform(x), expected_output)

From e608a736fe0fa6d01a8a11a2c96072e116c2877a Mon Sep 17 00:00:00 2001
From: Jesper Dramsch <jesper.dramsch@ecmwf.int>
Date: Mon, 23 Sep 2024 16:52:14 +0200
Subject: [PATCH 13/14] [Feature] 28 Make models switchable through the config
 (#45)

* feat: make model instantiateable

* docs: instantiation explained in changelog

* refactor: rename model config object

* fix: rename config to model_config

* fix: mark non-recursive

* docs: changelog
---
 CHANGELOG.md                                  |  1 +
 src/anemoi/models/interface/__init__.py       | 11 +++++----
 .../models/encoder_processor_decoder.py       | 24 +++++++++----------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7811ef3..5678486 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,7 @@ Keep it human-readable, your future self will thank you!
 - Update CI to inherit from common infrastructue reusable workflows
 - run downstream-ci only when src and tests folders have changed
 - New error messages for wrongs graphs.
+- Feature: Change model to be instantiatable in the interface, addressing [#28](https://github.com/ecmwf/anemoi-models/issues/28) through [#45](https://github.com/ecmwf/anemoi-models/pulls/45)
 
 ### Removed
 
diff --git a/src/anemoi/models/interface/__init__.py b/src/anemoi/models/interface/__init__.py
index 626940f..aba62a2 100644
--- a/src/anemoi/models/interface/__init__.py
+++ b/src/anemoi/models/interface/__init__.py
@@ -14,7 +14,6 @@
 from hydra.utils import instantiate
 from torch_geometric.data import HeteroData
 
-from anemoi.models.models.encoder_processor_decoder import AnemoiModelEncProcDec
 from anemoi.models.preprocessing import Processors
 
 
@@ -73,9 +72,13 @@ def _build_model(self) -> None:
         self.pre_processors = Processors(processors)
         self.post_processors = Processors(processors, inverse=True)
 
-        # Instantiate the model (Can be generalised to other models in the future, here we use AnemoiModelEncProcDec)
-        self.model = AnemoiModelEncProcDec(
-            config=self.config, data_indices=self.data_indices, graph_data=self.graph_data
+        # Instantiate the model
+        self.model = instantiate(
+            self.config.model.model,
+            model_config=self.config,
+            data_indices=self.data_indices,
+            graph_data=self.graph_data,
+            _recursive_=False,  # Disables recursive instantiation by Hydra
         )
 
         # Use the forward method of the model directly
diff --git a/src/anemoi/models/models/encoder_processor_decoder.py b/src/anemoi/models/models/encoder_processor_decoder.py
index b043b0c..aa7e8bb 100644
--- a/src/anemoi/models/models/encoder_processor_decoder.py
+++ b/src/anemoi/models/models/encoder_processor_decoder.py
@@ -32,7 +32,7 @@ class AnemoiModelEncProcDec(nn.Module):
     def __init__(
         self,
         *,
-        config: DotDict,
+        model_config: DotDict,
         data_indices: dict,
         graph_data: HeteroData,
     ) -> None:
@@ -40,8 +40,8 @@ def __init__(
 
         Parameters
         ----------
-        config : DotDict
-            Job configuration
+        model_config : DotDict
+            Model configuration
         data_indices : dict
             Data indices
         graph_data : HeteroData
@@ -50,15 +50,15 @@ def __init__(
         super().__init__()
 
         self._graph_data = graph_data
-        self._graph_name_data = config.graph.data
-        self._graph_name_hidden = config.graph.hidden
+        self._graph_name_data = model_config.graph.data
+        self._graph_name_hidden = model_config.graph.hidden
 
         self._calculate_shapes_and_indices(data_indices)
         self._assert_matching_indices(data_indices)
 
-        self.multi_step = config.training.multistep_input
+        self.multi_step = model_config.training.multistep_input
 
-        self._define_tensor_sizes(config)
+        self._define_tensor_sizes(model_config)
 
         # Create trainable tensors
         self._create_trainable_attributes()
@@ -69,13 +69,13 @@ def __init__(
 
         self.data_indices = data_indices
 
-        self.num_channels = config.model.num_channels
+        self.num_channels = model_config.model.num_channels
 
         input_dim = self.multi_step * self.num_input_channels + self.latlons_data.shape[1] + self.trainable_data_size
 
         # Encoder data -> hidden
         self.encoder = instantiate(
-            config.model.encoder,
+            model_config.model.encoder,
             in_channels_src=input_dim,
             in_channels_dst=self.latlons_hidden.shape[1] + self.trainable_hidden_size,
             hidden_dim=self.num_channels,
@@ -86,7 +86,7 @@ def __init__(
 
         # Processor hidden -> hidden
         self.processor = instantiate(
-            config.model.processor,
+            model_config.model.processor,
             num_channels=self.num_channels,
             sub_graph=self._graph_data[(self._graph_name_hidden, "to", self._graph_name_hidden)],
             src_grid_size=self._hidden_grid_size,
@@ -95,7 +95,7 @@ def __init__(
 
         # Decoder hidden -> data
         self.decoder = instantiate(
-            config.model.decoder,
+            model_config.model.decoder,
             in_channels_src=self.num_channels,
             in_channels_dst=input_dim,
             hidden_dim=self.num_channels,
@@ -109,7 +109,7 @@ def __init__(
         self.boundings = nn.ModuleList(
             [
                 instantiate(cfg, name_to_index=self.data_indices.model.output.name_to_index)
-                for cfg in getattr(config.model, "bounding", [])
+                for cfg in getattr(model_config.model, "bounding", [])
             ]
         )
 

From 6e623b983179d6228d48abce2f1ddf18defc988d Mon Sep 17 00:00:00 2001
From: Jesper Dramsch <jesper.dramsch@ecmwf.int>
Date: Tue, 24 Sep 2024 19:04:28 +0200
Subject: [PATCH 14/14] ci: fix publish and PR workflow (#48)

---
 .github/workflows/python-publish.yml      | 40 ++++-------------------
 .github/workflows/python-pull-request.yml | 19 +++++++++++
 2 files changed, 26 insertions(+), 33 deletions(-)
 create mode 100644 .github/workflows/python-pull-request.yml

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index de01bf6..2cb554a 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -4,48 +4,22 @@
 name: Upload Python Package
 
 on:
-
-  push: {}
-
   release:
     types: [created]
 
 jobs:
   quality:
-    name: Code QA
-    runs-on: ubuntu-latest
-    steps:
-    - run: sudo apt-get install -y pandoc # Needed by sphinx for notebooks
-    - uses: actions/checkout@v4
-    - uses: actions/setup-python@v5
-      with:
-        python-version: 3.x
-    - uses: pre-commit/action@v3.0.1
+    uses: ecmwf-actions/reusable-workflows/.github/workflows/qa-precommit-run.yml@v2
+    with:
+      skip-hooks: "no-commit-to-branch"
 
   checks:
     strategy:
-      fail-fast: false
       matrix:
-        platform: ["ubuntu-latest", "macos-latest"]
-        python-version: ["3.10"]
-
-    name: Python  ${{ matrix.python-version }} on ${{ matrix.platform }}
-    runs-on: ${{ matrix.platform }}
-
-    steps:
-    - uses: actions/checkout@v4
-
-    - uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-
-    - name: Install
-      run: |
-        pip install -e .[all,tests]
-        pip freeze
-
-    - name: Tests
-      run: pytest
+        python-version: ["3.9", "3.10"]
+    uses: ecmwf-actions/reusable-workflows/.github/workflows/qa-pytest-pyproject.yml@v2
+    with:
+      python-version: ${{ matrix.python-version }}
 
   deploy:
     needs: [checks, quality]
diff --git a/.github/workflows/python-pull-request.yml b/.github/workflows/python-pull-request.yml
new file mode 100644
index 0000000..3488f55
--- /dev/null
+++ b/.github/workflows/python-pull-request.yml
@@ -0,0 +1,19 @@
+---
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Code Quality checks for PRs
+
+on:
+  push:
+  pull_request:
+    types: [opened, synchronize, reopened]
+
+jobs:
+  quality:
+    uses: ecmwf-actions/reusable-workflows/.github/workflows/qa-precommit-run.yml@v2
+    with:
+      skip-hooks: "no-commit-to-branch"
+
+  checks:
+    uses: ecmwf-actions/reusable-workflows/.github/workflows/qa-pytest-pyproject.yml@v2