[Bugfix] Fix Qwen2.5-VL quantized model weights loading (#23512)

Signed-off-by: Zifei Tong <zifeitong@gmail.com>

[Bugfix] Fix Qwen2.5-VL quantized model weights loading (#23512)
Signed-off-by: Zifei Tong <zifeitong@gmail.com>
a71e4765 · zifeitong · GitHub · 39971db3 · a71e4765
Unverified Commit a71e4765 authored Aug 24, 2025 by zifeitong Committed by GitHub Aug 25, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

vllm/model_executor/models/qwen2_5_vl.py vllm/model_executor/models/qwen2_5_vl.py +5 -1

No files found.
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
@@ -135,7 +135,7 @@ class Qwen2_5_VLVideoPixelInputs(TypedDict):

    second_per_grid_ts: torch.Tensor
    """
-    The video time interval (in seconds) for each grid along the temporal 
+    The video time interval (in seconds) for each grid along the temporal
    dimension in the 3D position IDs. Returned when `videos` is not `None`.
    """

@@ -852,6 +852,10 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module, SupportsMultiModal,
                                         SupportsLoRA, SupportsPP,
                                         SupportsQuant):

+    packed_modules_mapping = {
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+
    # To ensure correct weight loading and mapping.
    hf_to_vllm_mapper = WeightsMapper(
        orig_to_new_prefix={