[Model]Improve Qwen3VLMoeForConditionalGeneration packed_modules_mapping (#27096)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>

[Model]Improve Qwen3VLMoeForConditionalGeneration packed_modules_mapping (#27096)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
daec4d26 · Jee Jee Li · GitHub · 6c9fdbf7 · daec4d26
Unverified Commit daec4d26 authored Oct 17, 2025 by Jee Jee Li Committed by GitHub Oct 17, 2025
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 0 deletions

vllm/model_executor/models/qwen3_vl_moe.py vllm/model_executor/models/qwen3_vl_moe.py +13 -0

No files found.
--- a/vllm/model_executor/models/qwen3_vl_moe.py
+++ b/vllm/model_executor/models/qwen3_vl_moe.py
@@ -350,6 +350,14 @@ class Qwen3MoeLLMForCausalLM(Qwen3MoeForCausalLM):
    dummy_inputs=Qwen3VLDummyInputsBuilder,
 )
 class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+    }
+
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super(Qwen3VLForConditionalGeneration, self).__init__()
        config: Qwen3VLMoeConfig = vllm_config.model_config.hf_config
@@ -376,6 +384,11 @@ class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
        self.language_model = Qwen3MoeLLMForCausalLM(
            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "language_model")
        )
+        # Whether to include the gate_up_proj mapping is determined by
+        # the language model.
+        self.packed_modules_mapping = (
+            self.packed_modules_mapping | self.language_model.packed_modules_mapping
+        )

        self.make_empty_intermediate_tensors = (
            self.language_model.make_empty_intermediate_tensors