[Bugfix] Add missing `packed_modules_mapping` to `DeepseekV2ForCausalLM` (#22352)

Signed-off-by: Felix Marty <Felix.Marty@amd.com>

[Bugfix] Add missing `packed_modules_mapping` to `DeepseekV2ForCausalLM` (#22352)
Signed-off-by: Felix Marty <Felix.Marty@amd.com>
7e0b1218 · fxmarty-amd · GitHub · 766bc816 · 7e0b1218
Unverified Commit 7e0b1218 authored Aug 07, 2025 by fxmarty-amd Committed by GitHub Aug 07, 2025
Show whitespace changes
Inline Side-by-side

Showing with 16 additions and 0 deletions

vllm/model_executor/models/deepseek_v2.py vllm/model_executor/models/deepseek_v2.py +16 -0

No files found.
--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
@@ -726,6 +726,9 @@ class DeepseekV2Model(nn.Module):


 class DeepseekV2ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
+    packed_modules_mapping = {
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__()
@@ -733,6 +736,19 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
        quant_config = vllm_config.quant_config
        self.config = config
        self.quant_config = quant_config
+
+        # `packed_modules_mapping` needs to be modified before
+        # initializing DeepseekV2Model, as it is passed inplace to
+        # quantization config init and may be used to select the
+        # quant_method for relevant layers during initialization.
+        self.fuse_qkv_a_proj = hasattr(
+            config, "q_lora_rank") and config.q_lora_rank is not None
+        if self.fuse_qkv_a_proj:
+            self.packed_modules_mapping["fused_qkv_a_proj"] = [
+                "q_a_proj",
+                "kv_a_proj_with_mqa",
+            ]
+
        self.model = DeepseekV2Model(vllm_config=vllm_config,
                                     prefix=maybe_prefix(prefix, "model"))
        if get_pp_group().is_last_rank: