From 5b720930c7c71fd41f5539cfc79f80f1e7ad0e95 Mon Sep 17 00:00:00 2001
From: Daniel Huang <daniel1.huang@intel.com>
Date: Wed, 11 Dec 2024 14:59:17 -0800
Subject: [PATCH] Add arctic model support by adding w2 to all_reduce

Signed-off-by: Daniel Huang <daniel1.huang@intel.com>
---
 deepspeed/module_inject/auto_tp.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/deepspeed/module_inject/auto_tp.py b/deepspeed/module_inject/auto_tp.py
index 221d490a37d2..5441000e581d 100755
--- a/deepspeed/module_inject/auto_tp.py
+++ b/deepspeed/module_inject/auto_tp.py
@@ -346,11 +346,15 @@ def _replace(self, child, name, conv_linear_layer):
                 weight, bias = shard_value_with_share_qk(child.weight.data, child.bias, dist.get_rank(),
                                                          dist.get_world_size(), False)
                 return LinearAllreduce(weight, bias, self.mp_group)
+        # For Arctic model, bypass to all_reduce replacement for w2 weights
+        arctic_w2_all_reduce_linear = False
+        if 'Arctic' in str(self.module) and 'w2' in name:
+            arctic_w2_all_reduce_linear = True
         # For MLP including chunk layer.
         if 'gate_up_proj' in name or ('dense_h_to_4h' in name and 'GLM' in str(self.module)):
             weight, bias = shard_chunk_mlp(child.weight.data, child.bias, dist.get_rank(), dist.get_world_size())
             return LinearLayer(weight=weight, bias=bias)
-        if name in self.all_reduce_linears:
+        if name in self.all_reduce_linears or arctic_w2_all_reduce_linear:
             # if conv_linear_layer [weight_shape[1], weight_shape[0] // mp_size]
             # else [weight_shape[0], weight_shape[1] // mp_size]