Unverified Commit 17b4c668 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Bugfix] Fix Qwen3-VL max_num_video_tokens calculation for video profiling (#25648)


Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent 3c2b2cce
...@@ -82,7 +82,7 @@ from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model ...@@ -82,7 +82,7 @@ from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model
logger = init_logger(__name__) logger = init_logger(__name__)
# For profile run # For profile run
_MAX_FRAMES_PER_VIDEO = 600 _MAX_FRAMES_PER_VIDEO = 32
# === Vision Inputs === # # === Vision Inputs === #
......
...@@ -715,6 +715,18 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]): ...@@ -715,6 +715,18 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
video_items.append(video_item) video_items.append(video_item)
return video_items return video_items
def get_dummy_processor_inputs(self, seq_len, mm_counts):
processor_inputs = super().get_dummy_processor_inputs(
seq_len, mm_counts)
# HACK(Isotr0py): We set do_resize to False here to reuse Qwen2-VL's
# profiling logic, which will be problematic for configurable mm
# profiling.
# TODO(Isotr0py): Switch to the implementation in
# https://github.com/vllm-project/vllm/pull/25557
# after supporting configurable mm profiling.
processor_inputs.hf_processor_mm_kwargs = {"do_resize": False}
return processor_inputs
class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo] class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo]
): ):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment