"git@developer.sourcefind.cn:change/sglang.git" did not exist on "8e10fec9a8c80216307adb64241cdb7a5fa48d36"
Unverified Commit fde2decf authored by Zheng Wengang's avatar Zheng Wengang Committed by GitHub
Browse files

[BugFix][Qwen3-VL]: add metadata for video in qwen3-vl (#11377)

parent 9792b9d7
......@@ -214,7 +214,14 @@ async def preprocess_video(
interpolation=InterpolationMode.BICUBIC,
antialias=True,
).float()
return video
video_metadata = {
"fps": video_fps,
"duration": total_frames / video_fps,
"total_num_frames": total_frames,
"frames_indices": idx,
"video_backend": "torchvision",
}
return video, video_metadata
# Compatible with Qwen-VL & Qwen-Omni Series
......@@ -279,14 +286,25 @@ class QwenVLImageProcessor(SGLangBaseProcessor):
resize_tasks = [resize_image_async(image) for image in base_output.images]
base_output.images = await asyncio.gather(*resize_tasks)
video_metadata = None
if base_output.videos:
base_output.videos = [
await preprocess_video(video) for video in base_output.videos
]
mm_items, input_ids, ret = self.process_and_combine_mm_data(
base_output, self.mm_tokens
)
video_results = await asyncio.gather(
*[preprocess_video(video) for video in base_output.videos]
)
base_output.videos, video_metadata = map(list, zip(*video_results))
# NOTE: for qwen3-vl, video_meta need to be passed in, since do_sample_frames is already done in preprocess_video
if self.hf_config.model_type in ("qwen3_vl", "qwen3_vl_moe"):
mm_items, input_ids, ret = self.process_and_combine_mm_data(
base_output,
self.mm_tokens,
video_metadata=video_metadata,
do_sample_frames=False,
)
else:
mm_items, input_ids, ret = self.process_and_combine_mm_data(
base_output, self.mm_tokens
)
audio_feature_lengths = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment