[Misc] Remove duplicated DeepSeek V2/V3 model definition (#12793)

449d1bce · Michael Goin · GitHub · 1a6fcad4 · 449d1bce · 449d1bce
Unverified Commit 449d1bce authored Feb 06, 2025 by Michael Goin Committed by GitHub Feb 05, 2025
4 changed files
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -754,7 +754,6 @@ class ModelConfig:
    @property
    def is_deepseek_mla(self) -> bool:
-        # TODO add deepseek_v3
        return (hasattr(self.hf_text_config, "model_type")) \
                and (self.hf_text_config.model_type in \
                    ('deepseek_v2', 'deepseek_v3'))\

--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
@@ -21,7 +21,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Inference-only DeepseekV2 model."""
+"""Inference-only DeepseekV2/DeepseekV3 model."""
 from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
 import torch
@@ -115,7 +115,19 @@ class DeepseekV2MoE(nn.Module):
            raise ValueError(f"Unsupported activation: {config.hidden_act}. "
                             "Only silu is supported for now.")
-        self.experts = FusedMoE(num_experts=config.n_routed_experts,
+        self.gate = ReplicatedLinear(config.hidden_size,
+                                     config.n_routed_experts,
+                                     bias=False,
+                                     quant_config=None,
+                                     prefix=f"{prefix}.gate")
+        if config.topk_method == "noaux_tc":
+            self.gate.e_score_correction_bias = nn.Parameter(
+                torch.empty(config.n_routed_experts))
+        else:
+            self.gate.e_score_correction_bias = None
+        self.experts = FusedMoE(
+            num_experts=config.n_routed_experts,
            top_k=config.num_experts_per_tok,
            hidden_size=config.hidden_size,
            intermediate_size=config.moe_intermediate_size,
@@ -125,13 +137,10 @@ class DeepseekV2MoE(nn.Module):
            use_grouped_topk=True,
            num_expert_group=config.n_group,
            topk_group=config.topk_group,
-                                prefix=f"{prefix}.experts")
+            prefix=f"{prefix}.experts",
+            scoring_func=config.scoring_func,
+            e_score_correction_bias=self.gate.e_score_correction_bias)
-        self.gate = ReplicatedLinear(config.hidden_size,
-                                     config.n_routed_experts,
-                                     bias=False,
-                                     quant_config=None,
-                                     prefix=f"{prefix}.gate")
        if config.n_shared_experts is not None:
            intermediate_size = (config.moe_intermediate_size *
                                 config.n_shared_experts)
@@ -732,6 +741,15 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP):
        for name, loaded_weight in weights:
            if "rotary_emb.inv_freq" in name:
                continue
+            # TODO(simon): support nextn predict layers
+            if hasattr(self.config, "num_nextn_predict_layers"
+                       ) and self.config.num_nextn_predict_layers > 0:
+                assert self.config.num_nextn_predict_layers == 1
+                layer_idx = self.config.num_hidden_layers
+                if name.startswith(f"model.layers.{layer_idx}"):
+                    continue
            for (param_name, weight_name, shard_id) in stacked_params_mapping:
                # Skip non-stacked layers and experts (experts handled below).
                if weight_name not in name:
@@ -793,3 +811,7 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP):
                    weight_loader(param, loaded_weight)
            loaded_params.add(name)
        return loaded_params
+class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
+    pass
--- a/vllm/model_executor/models/deepseek_v3.py
+++ b/vllm/model_executor/models/deepseek_v3.py
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -45,7 +45,7 @@ _TEXT_GENERATION_MODELS = {
    "DeciLMForCausalLM": ("decilm", "DeciLMForCausalLM"),
    "DeepseekForCausalLM": ("deepseek", "DeepseekForCausalLM"),
    "DeepseekV2ForCausalLM": ("deepseek_v2", "DeepseekV2ForCausalLM"),
-    "DeepseekV3ForCausalLM": ("deepseek_v3", "DeepseekV3ForCausalLM"),
+    "DeepseekV3ForCausalLM": ("deepseek_v2", "DeepseekV3ForCausalLM"),
    "ExaoneForCausalLM": ("exaone", "ExaoneForCausalLM"),
    "FalconForCausalLM": ("falcon", "FalconForCausalLM"),
    "Fairseq2LlamaForCausalLM": ("fairseq2_llama", "Fairseq2LlamaForCausalLM"),