From 38d78c6642767a9e63db32b5209ab84b8d47c282 Mon Sep 17 00:00:00 2001 From: sahahner Date: Mon, 23 Sep 2024 14:10:39 +0000 Subject: [PATCH 01/10] make preprocessors iterable --- src/anemoi/models/preprocessing/__init__.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/anemoi/models/preprocessing/__init__.py b/src/anemoi/models/preprocessing/__init__.py index 53017fbd..67db7ce9 100644 --- a/src/anemoi/models/preprocessing/__init__.py +++ b/src/anemoi/models/preprocessing/__init__.py @@ -137,6 +137,18 @@ def __init__(self, processors: list, inverse: bool = False) -> None: def __repr__(self) -> str: return f"{self.__class__.__name__} [{'inverse' if self.inverse else 'forward'}]({self.processors})" + def __iter__(self): + return iter(self.processors) + + def __getitem__(self, key): + return self.processors[key] + + def __setitem__(self, key, value): + self.processors[key] = value + + def __len__(self): + return len(self.processors) + def forward(self, x, in_place: bool = True) -> Tensor: """Process the input tensor. From e0c6067f83af6131c1f982c6ef5e3ced7dd58877 Mon Sep 17 00:00:00 2001 From: sahahner Date: Mon, 23 Sep 2024 14:11:33 +0000 Subject: [PATCH 02/10] feat: calculate nan mask for loss function in imputer forward pass --- src/anemoi/models/preprocessing/imputer.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/anemoi/models/preprocessing/imputer.py b/src/anemoi/models/preprocessing/imputer.py index 6ef5adbe..1c30fc8b 100644 --- a/src/anemoi/models/preprocessing/imputer.py +++ b/src/anemoi/models/preprocessing/imputer.py @@ -42,6 +42,8 @@ def __init__( super().__init__(config, data_indices, statistics) self.nan_locations = None + # weight imputed values wiht zero in loss calculation + self.loss_weights_training = None def _validate_indices(self): assert len(self.index_training_input) == len(self.index_inference_input) <= len(self.replacement), ( @@ -117,6 +119,15 @@ def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor: # Choose correct index based on number of variables if x.shape[-1] == self.num_training_input_vars: index = self.index_training_input + if self.loss_weights_training is None: + self.loss_weights_training = torch.ones( + (x.shape[-2], len(self.data_indices.data.output.name_to_index)), device=x.device + ) # shape (grid, n_outputs) + # for all variables that are imputed and part of the output, set the loss weight to zero + for idx_src, idx_dst in zip(self.index_training_input, self.index_training_output): + if idx_dst is not None: + self.loss_weights_training[:, idx_dst] = (~self.nan_locations[:, idx_src]).int() + elif x.shape[-1] == self.num_inference_input_vars: index = self.index_inference_input else: From cf22b5edbaaed4ff70c52d3c9238bc19214f0ec4 Mon Sep 17 00:00:00 2001 From: sahahner Date: Thu, 26 Sep 2024 19:17:54 +0000 Subject: [PATCH 03/10] remove iterators from baseprocessors --- src/anemoi/models/preprocessing/__init__.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/anemoi/models/preprocessing/__init__.py b/src/anemoi/models/preprocessing/__init__.py index 67db7ce9..53017fbd 100644 --- a/src/anemoi/models/preprocessing/__init__.py +++ b/src/anemoi/models/preprocessing/__init__.py @@ -137,18 +137,6 @@ def __init__(self, processors: list, inverse: bool = False) -> None: def __repr__(self) -> str: return f"{self.__class__.__name__} [{'inverse' if self.inverse else 'forward'}]({self.processors})" - def __iter__(self): - return iter(self.processors) - - def __getitem__(self, key): - return self.processors[key] - - def __setitem__(self, key, value): - self.processors[key] = value - - def __len__(self): - return len(self.processors) - def forward(self, x, in_place: bool = True) -> Tensor: """Process the input tensor. From fa16cb20b4c53bcd9bb9db71721f1c114598bf4a Mon Sep 17 00:00:00 2001 From: sahahner Date: Fri, 27 Sep 2024 12:39:01 +0000 Subject: [PATCH 04/10] transform loss nan mask in remapper --- src/anemoi/models/preprocessing/imputer.py | 23 ++++++++-------- src/anemoi/models/preprocessing/remapper.py | 29 +++++++++++++++++++++ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/src/anemoi/models/preprocessing/imputer.py b/src/anemoi/models/preprocessing/imputer.py index 1c30fc8b..78bf101e 100644 --- a/src/anemoi/models/preprocessing/imputer.py +++ b/src/anemoi/models/preprocessing/imputer.py @@ -43,7 +43,7 @@ def __init__( self.nan_locations = None # weight imputed values wiht zero in loss calculation - self.loss_weights_training = None + self.loss_mask_training = None def _validate_indices(self): assert len(self.index_training_input) == len(self.index_inference_input) <= len(self.replacement), ( @@ -110,24 +110,25 @@ def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor: if not in_place: x = x.clone() - # Initilialize mask once + # Initilialize nan mask once if self.nan_locations is None: # The mask is only saved for the last two dimensions (grid, variable) idx = [slice(0, 1)] * (x.ndim - 2) + [slice(None), slice(None)] self.nan_locations = torch.isnan(x[idx].squeeze()) + # Initialize training loss mask to weigh imputed values with zeroes once + if self.loss_mask_training is None: + self.loss_mask_training = torch.ones( + (x.shape[-2], len(self.data_indices.model.output.name_to_index)), device=x.device + ) # shape (grid, n_outputs) + # for all variables that are imputed and part of the model output, set the loss weight to zero + for idx_src, idx_dst in zip(self.index_training_input, self.index_inference_output): + if idx_dst is not None: + self.loss_mask_training[:, idx_dst] = (~self.nan_locations[:, idx_src]).int() + # Choose correct index based on number of variables if x.shape[-1] == self.num_training_input_vars: index = self.index_training_input - if self.loss_weights_training is None: - self.loss_weights_training = torch.ones( - (x.shape[-2], len(self.data_indices.data.output.name_to_index)), device=x.device - ) # shape (grid, n_outputs) - # for all variables that are imputed and part of the output, set the loss weight to zero - for idx_src, idx_dst in zip(self.index_training_input, self.index_training_output): - if idx_dst is not None: - self.loss_weights_training[:, idx_dst] = (~self.nan_locations[:, idx_src]).int() - elif x.shape[-1] == self.num_inference_input_vars: index = self.index_inference_input else: diff --git a/src/anemoi/models/preprocessing/remapper.py b/src/anemoi/models/preprocessing/remapper.py index a79e2af8..a1cf95b2 100644 --- a/src/anemoi/models/preprocessing/remapper.py +++ b/src/anemoi/models/preprocessing/remapper.py @@ -223,6 +223,35 @@ def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor: return x_remapped + def transform_loss_mask(self, mask: torch.Tensor) -> torch.Tensor: + """Remap the loss mask. + + ``` + x : torch.Tensor + Loss mask + ``` + """ + # use indices at model output level + index = self.index_inference_backmapped_output + indices_remapped = self.index_inference_output + indices_keep = self.indices_keep_inference_output + + # create new loss mask with target number of columns + mask_remapped = torch.zeros( + mask.shape[:-1] + (mask.shape[-1] + len(indices_remapped),), dtype=mask.dtype, device=mask.device + ) + + # copy loss mask for variables that are not remapped + mask_remapped[..., : len(indices_keep)] = mask[..., indices_keep] + + # remap loss mask for rest of variables + for idx_src, idx_dst in zip(indices_remapped, index): + if idx_dst is not None: + for jj, ii in enumerate(idx_dst): + mask_remapped[..., ii] = mask[..., idx_src] + + return mask_remapped + def inverse_transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor: """Convert and remap the output tensor. From 3551992f2ae3ce7ca0096b893960a8b4492de348 Mon Sep 17 00:00:00 2001 From: sahahner Date: Fri, 27 Sep 2024 14:31:25 +0000 Subject: [PATCH 05/10] use internal model indices for bounding --- src/anemoi/models/models/encoder_processor_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anemoi/models/models/encoder_processor_decoder.py b/src/anemoi/models/models/encoder_processor_decoder.py index aa7e8bb6..c77db6e8 100644 --- a/src/anemoi/models/models/encoder_processor_decoder.py +++ b/src/anemoi/models/models/encoder_processor_decoder.py @@ -108,7 +108,7 @@ def __init__( # Instantiation of model output bounding functions (e.g., to ensure outputs like TP are positive definite) self.boundings = nn.ModuleList( [ - instantiate(cfg, name_to_index=self.data_indices.model.output.name_to_index) + instantiate(cfg, name_to_index=self.data_indices.internal_model.output.name_to_index) for cfg in getattr(model_config.model, "bounding", []) ] ) From 87647b7c8f4588623acd0ba431f37610d4236b7d Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 2 Oct 2024 14:43:55 +0000 Subject: [PATCH 06/10] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4798ac9a..47303ce6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Keep it human-readable, your future self will thank you! - configurabilty of the dropout probability in the the MultiHeadSelfAttention module - Variable Bounding as configurable model layers [#13](https://github.com/ecmwf/anemoi-models/issues/13) - GraphTransformerMapperBlock chunking to reduce memory usage during inference [#46](https://github.com/ecmwf/anemoi-models/pull/46) +- Mask NaN values in training loss function [#271](https://github.com/ecmwf-lab/aifs-mono/issues/271) ### Changed - Bugfixes for CI From 29669a6b1980cf277d7a7e6733e358e8f2d016f6 Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 13 Nov 2024 09:02:22 +0000 Subject: [PATCH 07/10] tests --- .../test_preprocessor_imputer.py | 20 ++++++++++ .../test_preprocessor_remapper.py | 40 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/tests/preprocessing/test_preprocessor_imputer.py b/tests/preprocessing/test_preprocessor_imputer.py index a22e261c..5d1035eb 100644 --- a/tests/preprocessing/test_preprocessor_imputer.py +++ b/tests/preprocessing/test_preprocessor_imputer.py @@ -297,6 +297,26 @@ def test_mask_saving(imputer_fixture, data_fixture, request): assert torch.equal(imputer.nan_locations, expected_mask), "Mask not saved correctly after first run." +@pytest.mark.parametrize( + ("imputer_fixture", "data_fixture"), + [ + ("default_constant_imputer", "default_constant_data"), + ("non_default_constant_imputer", "non_default_constant_data"), + ("default_input_imputer", "default_input_data"), + ("non_default_input_imputer", "non_default_input_data"), + ], +) +def test_loss_nan_mask(imputer_fixture, data_fixture, request): + """Check that the imputer correctly transforms a tensor with NaNs.""" + x, _ = request.getfixturevalue(data_fixture) + expected = torch.tensor([[1.0, 1.0, 1.0], [1.0, 0.0, 1.0]]) # only prognostic and diagnostic variables + imputer = request.getfixturevalue(imputer_fixture) + imputer.transform(x) + assert torch.allclose( + imputer.loss_mask_training, expected + ), "Transform does not calculate NaN-mask for loss function scaling correctly." + + @pytest.mark.parametrize( ("imputer_fixture", "data_fixture"), [ diff --git a/tests/preprocessing/test_preprocessor_remapper.py b/tests/preprocessing/test_preprocessor_remapper.py index a0ece2a3..bbb3a168 100644 --- a/tests/preprocessing/test_preprocessor_remapper.py +++ b/tests/preprocessing/test_preprocessor_remapper.py @@ -8,11 +8,13 @@ # nor does it submit to any jurisdiction. +import numpy as np import pytest import torch from omegaconf import DictConfig from anemoi.models.data_indices.collection import IndexCollection +from anemoi.models.preprocessing.imputer import InputImputer from anemoi.models.preprocessing.remapper import Remapper @@ -41,6 +43,34 @@ def input_remapper(): return Remapper(config=config.data.remapper, data_indices=data_indices, statistics=statistics) +@pytest.fixture() +def input_imputer(): + config = DictConfig( + { + "diagnostics": {"log": {"code": {"level": "DEBUG"}}}, + "data": { + "remapper": { + "cos_sin": { + "d": ["cos_d", "sin_d"], + } + }, + "imputer": {"default": "none", "mean": ["y", "d"]}, + "forcing": ["z", "q"], + "diagnostic": ["other"], + "remapped": { + "d": ["cos_d", "sin_d"], + }, + }, + }, + ) + statistics = { + "mean": np.array([1.0, 2.0, 3.0, 4.5, 3.0, 1.0]), + } + name_to_index = {"x": 0, "y": 1, "z": 2, "q": 3, "d": 4, "other": 5} + data_indices = IndexCollection(config=config, name_to_index=name_to_index) + return InputImputer(config=config.data.imputer, data_indices=data_indices, statistics=statistics) + + def test_remap_not_inplace(input_remapper) -> None: x = torch.Tensor([[1.0, 2.0, 3.0, 4.0, 150.0, 5.0], [6.0, 7.0, 8.0, 9.0, 201.0, 10.0]]) input_remapper(x, in_place=False) @@ -66,3 +96,13 @@ def test_remap_inverse_transform(input_remapper) -> None: assert torch.allclose( input_remapper.inverse_transform(input_remapper.transform(x, in_place=False), in_place=False), x ) + + +def test_transform_loss_mask(input_imputer, input_remapper) -> None: + x = torch.Tensor([[1.0, np.nan, 3.0, 4.0, 150.0, 5.0], [6.0, 7.0, 8.0, 9.0, np.nan, 10.0]]) + expected_output = torch.Tensor([[1.0, 0.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 0.0, 0.0]]) + input_imputer.transform(x) + input_remapper.transform(x) + loss_mask_training = input_imputer.loss_mask_training + loss_mask_training = input_remapper.transform_loss_mask(loss_mask_training) + assert torch.allclose(loss_mask_training, expected_output) From 94f0d52c5812feb8c9c86af302d9e7cc4e1da58c Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 13 Nov 2024 09:45:25 +0000 Subject: [PATCH 08/10] remove obsolete enumerate --- src/anemoi/models/preprocessing/remapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anemoi/models/preprocessing/remapper.py b/src/anemoi/models/preprocessing/remapper.py index 891056cb..7cb15a66 100644 --- a/src/anemoi/models/preprocessing/remapper.py +++ b/src/anemoi/models/preprocessing/remapper.py @@ -248,7 +248,7 @@ def transform_loss_mask(self, mask: torch.Tensor) -> torch.Tensor: # remap loss mask for rest of variables for idx_src, idx_dst in zip(indices_remapped, index): if idx_dst is not None: - for jj, ii in enumerate(idx_dst): + for ii in idx_dst: mask_remapped[..., ii] = mask[..., idx_src] return mask_remapped From fcc4bcd9e60433940fabdb18e4318afc49dd6826 Mon Sep 17 00:00:00 2001 From: Sara Hahner <44293258+sahahner@users.noreply.github.com> Date: Fri, 15 Nov 2024 10:38:52 +0100 Subject: [PATCH 09/10] Update src/anemoi/models/preprocessing/imputer.py Co-authored-by: Harrison Cook --- src/anemoi/models/preprocessing/imputer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anemoi/models/preprocessing/imputer.py b/src/anemoi/models/preprocessing/imputer.py index 950a5958..2bd1bed4 100644 --- a/src/anemoi/models/preprocessing/imputer.py +++ b/src/anemoi/models/preprocessing/imputer.py @@ -111,7 +111,7 @@ def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor: if not in_place: x = x.clone() - # Initilialize nan mask once + # Initialize nan mask once if self.nan_locations is None: # The mask is only saved for the last two dimensions (grid, variable) idx = [slice(0, 1)] * (x.ndim - 2) + [slice(None), slice(None)] From 7aefe568d4666e67c33d28c6ef19bb412c61c9dd Mon Sep 17 00:00:00 2001 From: sahahner Date: Mon, 18 Nov 2024 14:11:07 +0000 Subject: [PATCH 10/10] nan mask and loss mask calculate together --- src/anemoi/models/preprocessing/imputer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/anemoi/models/preprocessing/imputer.py b/src/anemoi/models/preprocessing/imputer.py index 950a5958..fd1a7f5f 100644 --- a/src/anemoi/models/preprocessing/imputer.py +++ b/src/anemoi/models/preprocessing/imputer.py @@ -117,8 +117,7 @@ def transform(self, x: torch.Tensor, in_place: bool = True) -> torch.Tensor: idx = [slice(0, 1)] * (x.ndim - 2) + [slice(None), slice(None)] self.nan_locations = torch.isnan(x[idx].squeeze()) - # Initialize training loss mask to weigh imputed values with zeroes once - if self.loss_mask_training is None: + # Initialize training loss mask to weigh imputed values with zeroes once self.loss_mask_training = torch.ones( (x.shape[-2], len(self.data_indices.model.output.name_to_index)), device=x.device ) # shape (grid, n_outputs)