From 3229f4c99ae5352cb1d2d79013271e4d2a4b7cc3 Mon Sep 17 00:00:00 2001 From: fc Date: Fri, 10 May 2024 10:16:31 +0800 Subject: [PATCH] [FIX] fix deadlock in PipeEngine._exec_recv_grads --- deepspeed/runtime/pipe/engine.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/deepspeed/runtime/pipe/engine.py b/deepspeed/runtime/pipe/engine.py index be8fe1a368c6f..7bb6f9a5018d6 100644 --- a/deepspeed/runtime/pipe/engine.py +++ b/deepspeed/runtime/pipe/engine.py @@ -1204,9 +1204,7 @@ def _exec_recv_grads(self, buffer_id): # branches on is_grad_partitioned so we don't filter out the # metadata tensor. if self.is_grad_partitioned: - sizes_and_dtypes = [(list(t.size()), t.dtype) - for t in outputs[:2]] + [(list(t.size()), t.dtype) - for t in outputs[2:] if t.is_floating_point()] + sizes_and_dtypes = [(list(t.size()), t.dtype) for t in outputs[:2]] else: sizes_and_dtypes = [(list(t.size()), t.dtype) for t in outputs if t.is_floating_point()] self.grad_layer = self._allocate_buffers(sizes_and_dtypes, num_buffers=1)[0]