Skip to content

Commit

Permalink
Add support of Phi-2 model to DeepSpeed-FastGen
Browse files Browse the repository at this point in the history
  • Loading branch information
arashb committed Dec 18, 2023
1 parent 880417e commit a42e869
Show file tree
Hide file tree
Showing 13 changed files with 532 additions and 90 deletions.
3 changes: 3 additions & 0 deletions deepspeed/inference/v2/engine_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
MistralPolicy,
MixtralPolicy,
FalconPolicy,
PhiPolicy,
)
from .model_implementations.inference_policy_base import POLICIES, InferenceV2Policy
from .model_implementations.flat_model_helpers import make_metadata_filename, ModelMetadata
Expand Down Expand Up @@ -114,6 +115,8 @@ def build_hf_engine(path: str,
policy = MixtralPolicy(model_config, checkpoint_engine=checkpoint_engine)
elif model_config.model_type == "falcon":
policy = FalconPolicy(model_config, checkpoint_engine=checkpoint_engine)
elif model_config.model_type == "phi-msft":
policy = PhiPolicy(model_config, checkpoint_engine=checkpoint_engine)
else:
raise ValueError(f"Unsupported model type {model_config.model_type}")

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class BlockedRotaryEmbeddings(DSKernelBase):
"""

supported_dtypes = [DtypeEnum.fp16, DtypeEnum.bf16]
supported_head_sizes = [64, 128]
supported_head_sizes = [64, 80, 128]
supported_q_ratios = [1, 2, 4, 5, 8, 16, 29, 35, 36, 71]

def __init__(self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class BlockedTrainedRotaryEmbeddings(DSKernelBase):
"""

supported_dtypes = [DtypeEnum.fp16, DtypeEnum.bf16]
supported_head_sizes = [64, 128]
supported_head_sizes = [64, 80, 128]
supported_q_ratios = [1, 2, 4, 5, 8]

def __init__(self, head_size: int, n_q_heads: int, n_kv_heads: int, dtype: torch.dtype) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class LinearBlockedKVCopy(DSKernelBase):
"""

supported_dtypes = [DtypeEnum.fp16, DtypeEnum.bf16]
supported_head_sizes = [64, 128]
supported_head_sizes = [64, 80, 128]
supported_q_ratios = [1, 2, 4, 5, 8]

def __init__(self, head_size: int, n_q_heads: int, n_kv_heads: int, dtype: torch.dtype) -> None:
Expand Down
1 change: 1 addition & 0 deletions deepspeed/inference/v2/model_implementations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
from .mistral import *
from .mixtral import *
from .falcon import *
from .phi import *
6 changes: 6 additions & 0 deletions deepspeed/inference/v2/model_implementations/phi/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

from .phi_policy import PhiPolicy
98 changes: 98 additions & 0 deletions deepspeed/inference/v2/model_implementations/phi/phi_containers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

# Create a container object to save model-specific tensors using the policy file above.

from ..common_parameters import *
from ..layer_container_base import LayerContainer
'''
# HF Phi-2 model looks like this:
PhiForCausalLM(
(transformer): PhiModel(
(embd): Embedding(
(wte): Embedding(51200, 2560)
(drop): Dropout(p=0.0, inplace=False)
)
(h): ModuleList(
(0-31): 32 x ParallelBlock(
(ln): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
(resid_dropout): Dropout(p=0.1, inplace=False)
(mixer): MHA(
(rotary_emb): RotaryEmbedding()
(Wqkv): Linear(in_features=2560, out_features=7680, bias=True)
(out_proj): Linear(in_features=2560, out_features=2560, bias=True)
(inner_attn): SelfAttention(
(drop): Dropout(p=0.0, inplace=False)
)
(inner_cross_attn): CrossAttention(
(drop): Dropout(p=0.0, inplace=False)
)
)
(mlp): MLP(
(fc1): Linear(in_features=2560, out_features=10240, bias=True)
(fc2): Linear(in_features=10240, out_features=2560, bias=True)
(act): NewGELUActivation()
)
)
)
)
(lm_head): CausalLMHead(
(ln): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
(linear): Linear(in_features=2560, out_features=51200, bias=True)
)
(loss): CausalLMLoss(
(loss_fct): CrossEntropyLoss()
)
)
'''


class PhiTransformerContainer(LayerContainer):
"""
Transformer layer container for the Phi model.
"""
qkv_w: FusedQKVParameter
qkv_b: FusedQKVParameter
attn_out_w: AttentionOutputParameter
attn_out_b: AttentionOutputParameter
mlp_1_w: MLP1Parameter
mlp_1_b: MLP1Parameter
mlp_2_w: MLP2Parameter
mlp_2_b: MLP2Parameter
ln_gamma: NormParameter
ln_beta: NormParameter

PARAM_MAPPING = {
"mixer.Wqkv.weight": "qkv_w.params",
"mixer.Wqkv.bias": "qkv_b.params",
"mixer.out_proj.weight": "attn_out_w.params",
"mixer.out_proj.bias": "attn_out_b.params",
"mlp.fc1.weight": "mlp_1_w.params",
"mlp.fc1.bias": "mlp_1_b.params",
"mlp.fc2.weight": "mlp_2_w.params",
"mlp.fc2.bias": "mlp_2_b.params",
"ln.weight": "ln_gamma.params",
"ln.bias": "ln_beta.params",
}


class PhiNonTransformerContainer(LayerContainer):
"""
Non-Transformer layer container for the Phi model.
"""
word_emb: EmbeddingParameter
word_unembed_w: UnembedParameter
word_unembed_b: UnembedParameter
final_norm_gamma: NormParameter
final_norm_beta: NormParameter

PARAM_MAPPING = {
"transformer.embd.wte.weight": "word_emb.params",
"lm_head.ln.weight": "final_norm_gamma.params",
"lm_head.ln.bias": "final_norm_beta.params",
"lm_head.linear.weight": "word_unembed_w.params",
"lm_head.linear.bias": "word_unembed_b.params",
}
Loading

0 comments on commit a42e869

Please sign in to comment.