From ef90256d6dcedb4946953ca9cbc779550619ea60 Mon Sep 17 00:00:00 2001 From: "Lai, Yejing" Date: Mon, 15 Apr 2024 01:56:21 -0700 Subject: [PATCH 1/2] enable phi2 autotp --- deepspeed/module_inject/auto_tp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deepspeed/module_inject/auto_tp.py b/deepspeed/module_inject/auto_tp.py index 88f7086518e8..47ea224bb5fe 100644 --- a/deepspeed/module_inject/auto_tp.py +++ b/deepspeed/module_inject/auto_tp.py @@ -306,6 +306,8 @@ def tp_parser(model): # Mixtral-7x8b used w2*act(w1*w3) linear. need to replace w2 to linearallreduce. elif 'w2' in layer and 'Mixtral' in str(type(module)): gem_list = gem_list + [layer] + elif "self_attn.dense" in layer: + gem_list = gem_list + [layer] layer_list = [] if gem_list != []: From b45caa9478191b9ec86ad79ecc94d12865613fa2 Mon Sep 17 00:00:00 2001 From: "Lai, Yejing" Date: Thu, 2 May 2024 08:47:40 -0700 Subject: [PATCH 2/2] add phi policy --- deepspeed/module_inject/auto_tp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/module_inject/auto_tp.py b/deepspeed/module_inject/auto_tp.py index 47ea224bb5fe..2c867d101bde 100644 --- a/deepspeed/module_inject/auto_tp.py +++ b/deepspeed/module_inject/auto_tp.py @@ -306,7 +306,7 @@ def tp_parser(model): # Mixtral-7x8b used w2*act(w1*w3) linear. need to replace w2 to linearallreduce. elif 'w2' in layer and 'Mixtral' in str(type(module)): gem_list = gem_list + [layer] - elif "self_attn.dense" in layer: + elif "self_attn.dense" in layer and "Phi" in str(type(module)): gem_list = gem_list + [layer] layer_list = []