Unverified Commit 9f1c6422 authored by double7's avatar double7 Committed by GitHub
Browse files

[Bugfix] fix Qwen2.5-Omni processor output mapping (#23058)


Signed-off-by: default avatardouble7 <33449816+DoubleVII@users.noreply.github.com>
Co-authored-by: default avatar杨森 <yangsen.double7@bytedance.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent 7be3a59d
...@@ -88,6 +88,11 @@ def _qwen2_5_omni_thinker_field_config(hf_inputs: Mapping[str, torch.Tensor]): ...@@ -88,6 +88,11 @@ def _qwen2_5_omni_thinker_field_config(hf_inputs: Mapping[str, torch.Tensor]):
video_grid_thw = hf_inputs.get("video_grid_thw", torch.empty((0, 3))) video_grid_thw = hf_inputs.get("video_grid_thw", torch.empty((0, 3)))
video_grid_sizes = video_grid_thw.prod(-1) video_grid_sizes = video_grid_thw.prod(-1)
# vllm use `second_per_grid_ts` to compute multimodal rotary embedding
video_second_per_grid = hf_inputs.get("video_second_per_grid", None)
if video_second_per_grid is not None:
hf_inputs["second_per_grid_ts"] = video_second_per_grid
return dict( return dict(
input_audio_features=MultiModalFieldConfig.flat_from_sizes( input_audio_features=MultiModalFieldConfig.flat_from_sizes(
"audio", audio_feature_lengths, dim=1), "audio", audio_feature_lengths, dim=1),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment