From 5b720930c7c71fd41f5539cfc79f80f1e7ad0e95 Mon Sep 17 00:00:00 2001 From: Daniel Huang Date: Wed, 11 Dec 2024 14:59:17 -0800 Subject: [PATCH] Add arctic model support by adding w2 to all_reduce Signed-off-by: Daniel Huang --- deepspeed/module_inject/auto_tp.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/deepspeed/module_inject/auto_tp.py b/deepspeed/module_inject/auto_tp.py index 221d490a37d2..5441000e581d 100755 --- a/deepspeed/module_inject/auto_tp.py +++ b/deepspeed/module_inject/auto_tp.py @@ -346,11 +346,15 @@ def _replace(self, child, name, conv_linear_layer): weight, bias = shard_value_with_share_qk(child.weight.data, child.bias, dist.get_rank(), dist.get_world_size(), False) return LinearAllreduce(weight, bias, self.mp_group) + # For Arctic model, bypass to all_reduce replacement for w2 weights + arctic_w2_all_reduce_linear = False + if 'Arctic' in str(self.module) and 'w2' in name: + arctic_w2_all_reduce_linear = True # For MLP including chunk layer. if 'gate_up_proj' in name or ('dense_h_to_4h' in name and 'GLM' in str(self.module)): weight, bias = shard_chunk_mlp(child.weight.data, child.bias, dist.get_rank(), dist.get_world_size()) return LinearLayer(weight=weight, bias=bias) - if name in self.all_reduce_linears: + if name in self.all_reduce_linears or arctic_w2_all_reduce_linear: # if conv_linear_layer [weight_shape[1], weight_shape[0] // mp_size] # else [weight_shape[0], weight_shape[1] // mp_size]