[fix] fix qwen image_embeds input (#21049)

Signed-off-by: h-avsha <avshalom.manevich@hcompany.ai>

[fix] fix qwen image_embeds input (#21049)
Signed-off-by: h-avsha <avshalom.manevich@hcompany.ai>
a0f8a796 · Avshalom Manevich · GitHub · 18bdcf41 · a0f8a796
Unverified Commit a0f8a796 authored Jul 16, 2025 by Avshalom Manevich Committed by GitHub Jul 16, 2025
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/model_executor/models/qwen2_5_vl.py vllm/model_executor/models/qwen2_5_vl.py +2 -2

No files found.
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
@@ -974,7 +974,7 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module, SupportsMultiModal,
        grid_thw_list = grid_thw.tolist()

        if image_input["type"] == "image_embeds":
-            image_embeds = image_input["image_embeds"]
+            image_embeds = image_input["image_embeds"].type(self.visual.dtype)
        else:
            pixel_values = image_input["pixel_values"]
            image_embeds = self.visual(pixel_values, grid_thw=grid_thw_list)
@@ -994,7 +994,7 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module, SupportsMultiModal,
        grid_thw_list = grid_thw.tolist()

        if video_input["type"] == "video_embeds":
-            video_embeds = video_input["video_embeds"]
+            video_embeds = video_input["video_embeds"].type(self.visual.dtype)
        else:
            pixel_values_videos = video_input["pixel_values_videos"]
            video_embeds = self.visual(pixel_values_videos,