From a12b4f28b5504c47f592a03fd01faabd64020659 Mon Sep 17 00:00:00 2001 From: Cathal OBrien Date: Thu, 21 Nov 2024 12:41:03 +0000 Subject: [PATCH 1/3] reduce decoder mem usage declare an empty accum tensor outside the for loop. the old way of having out and out1 results in two copies of the array which results in more memory use. at 9km this added 6gb to peak mem usage --- src/anemoi/models/layers/block.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/anemoi/models/layers/block.py b/src/anemoi/models/layers/block.py index 60446d6..0d7df22 100644 --- a/src/anemoi/models/layers/block.py +++ b/src/anemoi/models/layers/block.py @@ -512,8 +512,9 @@ def forward( edge_attr_list, edge_index_list = sort_edges_1hop_chunks( num_nodes=size, edge_attr=edges, edge_index=edge_index, num_chunks=num_chunks ) + out=torch.zeros((x[1].shape[0], self.num_heads, self.out_channels_conv), device=x[1].device) for i in range(num_chunks): - out1 = self.conv( + out = self.conv( query=query, key=key, value=value, @@ -521,9 +522,6 @@ def forward( edge_index=edge_index_list[i], size=size, ) - if i == 0: - out = torch.zeros_like(out1, device=out1.device) - out = out + out1 else: out = self.conv(query=query, key=key, value=value, edge_attr=edges, edge_index=edge_index, size=size) From f7ce0937171a581dda473b3cdfc7c6e37e9f0b35 Mon Sep 17 00:00:00 2001 From: Cathal OBrien Date: Thu, 21 Nov 2024 12:42:54 +0000 Subject: [PATCH 2/3] typo --- src/anemoi/models/layers/block.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anemoi/models/layers/block.py b/src/anemoi/models/layers/block.py index 0d7df22..c21ff19 100644 --- a/src/anemoi/models/layers/block.py +++ b/src/anemoi/models/layers/block.py @@ -514,7 +514,7 @@ def forward( ) out=torch.zeros((x[1].shape[0], self.num_heads, self.out_channels_conv), device=x[1].device) for i in range(num_chunks): - out = self.conv( + out += self.conv( query=query, key=key, value=value, From a81ab4e74249e97b3a180767591a4e9779fe457e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 12:59:41 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/anemoi/models/layers/block.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anemoi/models/layers/block.py b/src/anemoi/models/layers/block.py index c21ff19..72e487d 100644 --- a/src/anemoi/models/layers/block.py +++ b/src/anemoi/models/layers/block.py @@ -512,7 +512,7 @@ def forward( edge_attr_list, edge_index_list = sort_edges_1hop_chunks( num_nodes=size, edge_attr=edges, edge_index=edge_index, num_chunks=num_chunks ) - out=torch.zeros((x[1].shape[0], self.num_heads, self.out_channels_conv), device=x[1].device) + out = torch.zeros((x[1].shape[0], self.num_heads, self.out_channels_conv), device=x[1].device) for i in range(num_chunks): out += self.conv( query=query,