From 880417e8c43bbd33363bbe0a1d3eafd0901c15de Mon Sep 17 00:00:00 2001 From: Connor Holmes Date: Sat, 16 Dec 2023 02:33:39 +0000 Subject: [PATCH] Clear output --- .../model_implementations/common_parameters/moe_parameters.py | 2 +- .../inference/v2/model_implementations/mixtral/container.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deepspeed/inference/v2/model_implementations/common_parameters/moe_parameters.py b/deepspeed/inference/v2/model_implementations/common_parameters/moe_parameters.py index d1cc30c50237..8ababf567ba9 100644 --- a/deepspeed/inference/v2/model_implementations/common_parameters/moe_parameters.py +++ b/deepspeed/inference/v2/model_implementations/common_parameters/moe_parameters.py @@ -75,4 +75,4 @@ def finalize(self) -> torch.Tensor: transposed_experts.append(fused_expert) stacked_experts = torch.stack(transposed_experts, dim=0) - return self.inference_model.transform_moe_mlp_2_param(stacked_experts) + return self.inference_model.transform_moe_mlp_1_param(stacked_experts) diff --git a/deepspeed/inference/v2/model_implementations/mixtral/container.py b/deepspeed/inference/v2/model_implementations/mixtral/container.py index 51bcbf180cfd..6ec4a0552b8f 100644 --- a/deepspeed/inference/v2/model_implementations/mixtral/container.py +++ b/deepspeed/inference/v2/model_implementations/mixtral/container.py @@ -36,7 +36,7 @@ class MixtralTransformerContainer(LayerContainer): class MixtralNonTransformerContainer(LayerContainer): word_emb: EmbeddingParameter - word_unembed: EmbeddingParameter + word_unembed: UnembedParameter final_norm: NormParameter PARAM_MAPPING = {