From 38d78c6642767a9e63db32b5209ab84b8d47c282 Mon Sep 17 00:00:00 2001
From: sahahner <hahner.sa@gmail.com>
Date: Mon, 23 Sep 2024 14:10:39 +0000
Subject: [PATCH 01/10] make preprocessors iterable

---
 src/anemoi/models/preprocessing/__init__.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/anemoi/models/preprocessing/__init__.py b/src/anemoi/models/preprocessing/__init__.py
index 53017fbd..67db7ce9 100644
--- a/src/anemoi/models/preprocessing/__init__.py
+++ b/src/anemoi/models/preprocessing/__init__.py
@@ -137,6 +137,18 @@ def __init__(self, processors: list, inverse: bool = False) -> None:
     def __repr__(self) -> str:
         return f"{self.__class__.__name__} [{'inverse' if self.inverse else 'forward'}]({self.processors})"
 
+    def __iter__(self):
+        return iter(self.processors)
+
+    def __getitem__(self, key):
+        return self.processors[key]
+
+    def __setitem__(self, key, value):
+        self.processors[key] = value
+
+    def __len__(self):
+        return len(self.processors)
+
     def forward(self, x, in_place: bool = True) -> Tensor:
         """Process the input tensor.
 

From e0c6067f83af6131c1f982c6ef5e3ced7dd58877 Mon Sep 17 00:00:00 2001
From: sahahner <hahner.sa@gmail.com>
Date: Mon, 23 Sep 2024 14:11:33 +0000
Subject: [PATCH 02/10] feat: calculate nan mask for loss function in imputer
 forward pass

---
 src/anemoi/models/preprocessing/imputer.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/anemoi/models/preprocessing/imputer.py b/src/anemoi/models/preprocessing/imputer.py
index 6ef5adbe..1c30fc8b 100644
--- a/src/anemoi/models/preprocessing/imputer.py
+++ b/src/anemoi/models/preprocessing/imputer.py
@@ -42,6 +42,8 @@ def __init__(
         super().__init__(config, data_indices, statistics)
 
         self.nan_locations = None
+        # weight imputed values wiht zero in loss calculation
+        self.loss_weights_training = None
 
     def _validate_indices(self):
         assert len(self.index_training_input) == len(self.index_inference_input) <= len(self.replacement), (
@@ -117,6 +119,15 @@ def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor:
         # Choose correct index based on number of variables
         if x.shape[-1] == self.num_training_input_vars:
             index = self.index_training_input
+            if self.loss_weights_training is None:
+                self.loss_weights_training = torch.ones(
+                    (x.shape[-2], len(self.data_indices.data.output.name_to_index)), device=x.device
+                )  # shape (grid, n_outputs)
+                # for all variables that are imputed and part of the output, set the loss weight to zero
+                for idx_src, idx_dst in zip(self.index_training_input, self.index_training_output):
+                    if idx_dst is not None:
+                        self.loss_weights_training[:, idx_dst] = (~self.nan_locations[:, idx_src]).int()
+
         elif x.shape[-1] == self.num_inference_input_vars:
             index = self.index_inference_input
         else:

From cf22b5edbaaed4ff70c52d3c9238bc19214f0ec4 Mon Sep 17 00:00:00 2001
From: sahahner <hahner.sa@gmail.com>
Date: Thu, 26 Sep 2024 19:17:54 +0000
Subject: [PATCH 03/10] remove iterators from baseprocessors

---
 src/anemoi/models/preprocessing/__init__.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/anemoi/models/preprocessing/__init__.py b/src/anemoi/models/preprocessing/__init__.py
index 67db7ce9..53017fbd 100644
--- a/src/anemoi/models/preprocessing/__init__.py
+++ b/src/anemoi/models/preprocessing/__init__.py
@@ -137,18 +137,6 @@ def __init__(self, processors: list, inverse: bool = False) -> None:
     def __repr__(self) -> str:
         return f"{self.__class__.__name__} [{'inverse' if self.inverse else 'forward'}]({self.processors})"
 
-    def __iter__(self):
-        return iter(self.processors)
-
-    def __getitem__(self, key):
-        return self.processors[key]
-
-    def __setitem__(self, key, value):
-        self.processors[key] = value
-
-    def __len__(self):
-        return len(self.processors)
-
     def forward(self, x, in_place: bool = True) -> Tensor:
         """Process the input tensor.
 

From fa16cb20b4c53bcd9bb9db71721f1c114598bf4a Mon Sep 17 00:00:00 2001
From: sahahner <hahner.sa@gmail.com>
Date: Fri, 27 Sep 2024 12:39:01 +0000
Subject: [PATCH 04/10] transform loss nan mask in remapper

---
 src/anemoi/models/preprocessing/imputer.py  | 23 ++++++++--------
 src/anemoi/models/preprocessing/remapper.py | 29 +++++++++++++++++++++
 2 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/src/anemoi/models/preprocessing/imputer.py b/src/anemoi/models/preprocessing/imputer.py
index 1c30fc8b..78bf101e 100644
--- a/src/anemoi/models/preprocessing/imputer.py
+++ b/src/anemoi/models/preprocessing/imputer.py
@@ -43,7 +43,7 @@ def __init__(
 
         self.nan_locations = None
         # weight imputed values wiht zero in loss calculation
-        self.loss_weights_training = None
+        self.loss_mask_training = None
 
     def _validate_indices(self):
         assert len(self.index_training_input) == len(self.index_inference_input) <= len(self.replacement), (
@@ -110,24 +110,25 @@ def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor:
         if not in_place:
             x = x.clone()
 
-        # Initilialize mask once
+        # Initilialize nan mask once
         if self.nan_locations is None:
             # The mask is only saved for the last two dimensions (grid, variable)
             idx = [slice(0, 1)] * (x.ndim - 2) + [slice(None), slice(None)]
             self.nan_locations = torch.isnan(x[idx].squeeze())
 
+        # Initialize training loss mask to weigh imputed values with zeroes once
+        if self.loss_mask_training is None:
+            self.loss_mask_training = torch.ones(
+                (x.shape[-2], len(self.data_indices.model.output.name_to_index)), device=x.device
+            )  # shape (grid, n_outputs)
+            # for all variables that are imputed and part of the model output, set the loss weight to zero
+            for idx_src, idx_dst in zip(self.index_training_input, self.index_inference_output):
+                if idx_dst is not None:
+                    self.loss_mask_training[:, idx_dst] = (~self.nan_locations[:, idx_src]).int()
+
         # Choose correct index based on number of variables
         if x.shape[-1] == self.num_training_input_vars:
             index = self.index_training_input
-            if self.loss_weights_training is None:
-                self.loss_weights_training = torch.ones(
-                    (x.shape[-2], len(self.data_indices.data.output.name_to_index)), device=x.device
-                )  # shape (grid, n_outputs)
-                # for all variables that are imputed and part of the output, set the loss weight to zero
-                for idx_src, idx_dst in zip(self.index_training_input, self.index_training_output):
-                    if idx_dst is not None:
-                        self.loss_weights_training[:, idx_dst] = (~self.nan_locations[:, idx_src]).int()
-
         elif x.shape[-1] == self.num_inference_input_vars:
             index = self.index_inference_input
         else:
diff --git a/src/anemoi/models/preprocessing/remapper.py b/src/anemoi/models/preprocessing/remapper.py
index a79e2af8..a1cf95b2 100644
--- a/src/anemoi/models/preprocessing/remapper.py
+++ b/src/anemoi/models/preprocessing/remapper.py
@@ -223,6 +223,35 @@ def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor:
 
         return x_remapped
 
+    def transform_loss_mask(self, mask: torch.Tensor) -> torch.Tensor:
+        """Remap the loss mask.
+
+        ```
+        x : torch.Tensor
+            Loss mask
+        ```
+        """
+        # use indices at model output level
+        index = self.index_inference_backmapped_output
+        indices_remapped = self.index_inference_output
+        indices_keep = self.indices_keep_inference_output
+
+        # create new loss mask with target number of columns
+        mask_remapped = torch.zeros(
+            mask.shape[:-1] + (mask.shape[-1] + len(indices_remapped),), dtype=mask.dtype, device=mask.device
+        )
+
+        # copy loss mask for variables that are not remapped
+        mask_remapped[..., : len(indices_keep)] = mask[..., indices_keep]
+
+        # remap loss mask for rest of variables
+        for idx_src, idx_dst in zip(indices_remapped, index):
+            if idx_dst is not None:
+                for jj, ii in enumerate(idx_dst):
+                    mask_remapped[..., ii] = mask[..., idx_src]
+
+        return mask_remapped
+
     def inverse_transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor:
         """Convert and remap the output tensor.
 

From 3551992f2ae3ce7ca0096b893960a8b4492de348 Mon Sep 17 00:00:00 2001
From: sahahner <hahner.sa@gmail.com>
Date: Fri, 27 Sep 2024 14:31:25 +0000
Subject: [PATCH 05/10] use internal model indices for bounding

---
 src/anemoi/models/models/encoder_processor_decoder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/anemoi/models/models/encoder_processor_decoder.py b/src/anemoi/models/models/encoder_processor_decoder.py
index aa7e8bb6..c77db6e8 100644
--- a/src/anemoi/models/models/encoder_processor_decoder.py
+++ b/src/anemoi/models/models/encoder_processor_decoder.py
@@ -108,7 +108,7 @@ def __init__(
         # Instantiation of model output bounding functions (e.g., to ensure outputs like TP are positive definite)
         self.boundings = nn.ModuleList(
             [
-                instantiate(cfg, name_to_index=self.data_indices.model.output.name_to_index)
+                instantiate(cfg, name_to_index=self.data_indices.internal_model.output.name_to_index)
                 for cfg in getattr(model_config.model, "bounding", [])
             ]
         )

From 87647b7c8f4588623acd0ba431f37610d4236b7d Mon Sep 17 00:00:00 2001
From: sahahner <hahner.sa@gmail.com>
Date: Wed, 2 Oct 2024 14:43:55 +0000
Subject: [PATCH 06/10] changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4798ac9a..47303ce6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ Keep it human-readable, your future self will thank you!
 - configurabilty of the dropout probability in the the MultiHeadSelfAttention module
 - Variable Bounding as configurable model layers [#13](https://github.com/ecmwf/anemoi-models/issues/13)
 - GraphTransformerMapperBlock chunking to reduce memory usage during inference [#46](https://github.com/ecmwf/anemoi-models/pull/46)
+- Mask NaN values in training loss function [#271](https://github.com/ecmwf-lab/aifs-mono/issues/271)
 
 ### Changed
 - Bugfixes for CI

From 29669a6b1980cf277d7a7e6733e358e8f2d016f6 Mon Sep 17 00:00:00 2001
From: sahahner <hahner.sa@gmail.com>
Date: Wed, 13 Nov 2024 09:02:22 +0000
Subject: [PATCH 07/10] tests

---
 .../test_preprocessor_imputer.py              | 20 ++++++++++
 .../test_preprocessor_remapper.py             | 40 +++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/tests/preprocessing/test_preprocessor_imputer.py b/tests/preprocessing/test_preprocessor_imputer.py
index a22e261c..5d1035eb 100644
--- a/tests/preprocessing/test_preprocessor_imputer.py
+++ b/tests/preprocessing/test_preprocessor_imputer.py
@@ -297,6 +297,26 @@ def test_mask_saving(imputer_fixture, data_fixture, request):
     assert torch.equal(imputer.nan_locations, expected_mask), "Mask not saved correctly after first run."
 
 
+@pytest.mark.parametrize(
+    ("imputer_fixture", "data_fixture"),
+    [
+        ("default_constant_imputer", "default_constant_data"),
+        ("non_default_constant_imputer", "non_default_constant_data"),
+        ("default_input_imputer", "default_input_data"),
+        ("non_default_input_imputer", "non_default_input_data"),
+    ],
+)
+def test_loss_nan_mask(imputer_fixture, data_fixture, request):
+    """Check that the imputer correctly transforms a tensor with NaNs."""
+    x, _ = request.getfixturevalue(data_fixture)
+    expected = torch.tensor([[1.0, 1.0, 1.0], [1.0, 0.0, 1.0]])  # only prognostic and diagnostic variables
+    imputer = request.getfixturevalue(imputer_fixture)
+    imputer.transform(x)
+    assert torch.allclose(
+        imputer.loss_mask_training, expected
+    ), "Transform does not calculate NaN-mask for loss function scaling correctly."
+
+
 @pytest.mark.parametrize(
     ("imputer_fixture", "data_fixture"),
     [
diff --git a/tests/preprocessing/test_preprocessor_remapper.py b/tests/preprocessing/test_preprocessor_remapper.py
index a0ece2a3..bbb3a168 100644
--- a/tests/preprocessing/test_preprocessor_remapper.py
+++ b/tests/preprocessing/test_preprocessor_remapper.py
@@ -8,11 +8,13 @@
 # nor does it submit to any jurisdiction.
 
 
+import numpy as np
 import pytest
 import torch
 from omegaconf import DictConfig
 
 from anemoi.models.data_indices.collection import IndexCollection
+from anemoi.models.preprocessing.imputer import InputImputer
 from anemoi.models.preprocessing.remapper import Remapper
 
 
@@ -41,6 +43,34 @@ def input_remapper():
     return Remapper(config=config.data.remapper, data_indices=data_indices, statistics=statistics)
 
 
+@pytest.fixture()
+def input_imputer():
+    config = DictConfig(
+        {
+            "diagnostics": {"log": {"code": {"level": "DEBUG"}}},
+            "data": {
+                "remapper": {
+                    "cos_sin": {
+                        "d": ["cos_d", "sin_d"],
+                    }
+                },
+                "imputer": {"default": "none", "mean": ["y", "d"]},
+                "forcing": ["z", "q"],
+                "diagnostic": ["other"],
+                "remapped": {
+                    "d": ["cos_d", "sin_d"],
+                },
+            },
+        },
+    )
+    statistics = {
+        "mean": np.array([1.0, 2.0, 3.0, 4.5, 3.0, 1.0]),
+    }
+    name_to_index = {"x": 0, "y": 1, "z": 2, "q": 3, "d": 4, "other": 5}
+    data_indices = IndexCollection(config=config, name_to_index=name_to_index)
+    return InputImputer(config=config.data.imputer, data_indices=data_indices, statistics=statistics)
+
+
 def test_remap_not_inplace(input_remapper) -> None:
     x = torch.Tensor([[1.0, 2.0, 3.0, 4.0, 150.0, 5.0], [6.0, 7.0, 8.0, 9.0, 201.0, 10.0]])
     input_remapper(x, in_place=False)
@@ -66,3 +96,13 @@ def test_remap_inverse_transform(input_remapper) -> None:
     assert torch.allclose(
         input_remapper.inverse_transform(input_remapper.transform(x, in_place=False), in_place=False), x
     )
+
+
+def test_transform_loss_mask(input_imputer, input_remapper) -> None:
+    x = torch.Tensor([[1.0, np.nan, 3.0, 4.0, 150.0, 5.0], [6.0, 7.0, 8.0, 9.0, np.nan, 10.0]])
+    expected_output = torch.Tensor([[1.0, 0.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 0.0, 0.0]])
+    input_imputer.transform(x)
+    input_remapper.transform(x)
+    loss_mask_training = input_imputer.loss_mask_training
+    loss_mask_training = input_remapper.transform_loss_mask(loss_mask_training)
+    assert torch.allclose(loss_mask_training, expected_output)

From 94f0d52c5812feb8c9c86af302d9e7cc4e1da58c Mon Sep 17 00:00:00 2001
From: sahahner <hahner.sa@gmail.com>
Date: Wed, 13 Nov 2024 09:45:25 +0000
Subject: [PATCH 08/10] remove obsolete enumerate

---
 src/anemoi/models/preprocessing/remapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/anemoi/models/preprocessing/remapper.py b/src/anemoi/models/preprocessing/remapper.py
index 891056cb..7cb15a66 100644
--- a/src/anemoi/models/preprocessing/remapper.py
+++ b/src/anemoi/models/preprocessing/remapper.py
@@ -248,7 +248,7 @@ def transform_loss_mask(self, mask: torch.Tensor) -> torch.Tensor:
         # remap loss mask for rest of variables
         for idx_src, idx_dst in zip(indices_remapped, index):
             if idx_dst is not None:
-                for jj, ii in enumerate(idx_dst):
+                for ii in idx_dst:
                     mask_remapped[..., ii] = mask[..., idx_src]
 
         return mask_remapped

From fcc4bcd9e60433940fabdb18e4318afc49dd6826 Mon Sep 17 00:00:00 2001
From: Sara Hahner <44293258+sahahner@users.noreply.github.com>
Date: Fri, 15 Nov 2024 10:38:52 +0100
Subject: [PATCH 09/10] Update src/anemoi/models/preprocessing/imputer.py

Co-authored-by: Harrison Cook <Harrison.cook@ecmwf.int>
---
 src/anemoi/models/preprocessing/imputer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/anemoi/models/preprocessing/imputer.py b/src/anemoi/models/preprocessing/imputer.py
index 950a5958..2bd1bed4 100644
--- a/src/anemoi/models/preprocessing/imputer.py
+++ b/src/anemoi/models/preprocessing/imputer.py
@@ -111,7 +111,7 @@ def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor:
         if not in_place:
             x = x.clone()
 
-        # Initilialize nan mask once
+        # Initialize nan mask once
         if self.nan_locations is None:
             # The mask is only saved for the last two dimensions (grid, variable)
             idx = [slice(0, 1)] * (x.ndim - 2) + [slice(None), slice(None)]

From 7aefe568d4666e67c33d28c6ef19bb412c61c9dd Mon Sep 17 00:00:00 2001
From: sahahner <hahner.sa@gmail.com>
Date: Mon, 18 Nov 2024 14:11:07 +0000
Subject: [PATCH 10/10] nan mask and loss mask calculate together

---
 src/anemoi/models/preprocessing/imputer.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/anemoi/models/preprocessing/imputer.py b/src/anemoi/models/preprocessing/imputer.py
index 950a5958..fd1a7f5f 100644
--- a/src/anemoi/models/preprocessing/imputer.py
+++ b/src/anemoi/models/preprocessing/imputer.py
@@ -117,8 +117,7 @@ def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor:
             idx = [slice(0, 1)] * (x.ndim - 2) + [slice(None), slice(None)]
             self.nan_locations = torch.isnan(x[idx].squeeze())
 
-        # Initialize training loss mask to weigh imputed values with zeroes once
-        if self.loss_mask_training is None:
+            # Initialize training loss mask to weigh imputed values with zeroes once
             self.loss_mask_training = torch.ones(
                 (x.shape[-2], len(self.data_indices.model.output.name_to_index)), device=x.device
             )  # shape (grid, n_outputs)