[Bugfix] Fix Qwen3-VL max_num_video_tokens calculation for video profiling (#25648)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>

[Bugfix] Fix Qwen3-VL max_num_video_tokens calculation for video profiling (#25648)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
17b4c668 · Isotr0py · GitHub · 3c2b2cce · 17b4c668 · 17b4c668
Unverified Commit 17b4c668 authored Sep 25, 2025 by Isotr0py Committed by GitHub Sep 25, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 1 deletion

vllm/model_executor/models/qwen2_vl.py vllm/model_executor/models/qwen2_vl.py +1 -1

vllm/model_executor/models/qwen3_vl.py vllm/model_executor/models/qwen3_vl.py +12 -0

No files found.
--- a/vllm/model_executor/models/qwen2_vl.py
+++ b/vllm/model_executor/models/qwen2_vl.py
@@ -82,7 +82,7 @@ from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model
 logger = init_logger(__name__)

 # For profile run
-_MAX_FRAMES_PER_VIDEO = 600
+_MAX_FRAMES_PER_VIDEO = 32

 # === Vision Inputs === #


--- a/vllm/model_executor/models/qwen3_vl.py
+++ b/vllm/model_executor/models/qwen3_vl.py
@@ -715,6 +715,18 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
            video_items.append(video_item)
        return video_items

+    def get_dummy_processor_inputs(self, seq_len, mm_counts):
+        processor_inputs = super().get_dummy_processor_inputs(
+            seq_len, mm_counts)
+        # HACK(Isotr0py): We set do_resize to False here to reuse Qwen2-VL's
+        # profiling logic, which will be problematic for configurable mm
+        # profiling.
+        # TODO(Isotr0py): Switch to the implementation in
+        # https://github.com/vllm-project/vllm/pull/25557
+        # after supporting configurable mm profiling.
+        processor_inputs.hf_processor_mm_kwargs = {"do_resize": False}
+        return processor_inputs
+

 class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo]
                                 ):