[Bugfix] Fix HunyuanVL XD-RoPE (#29593)

Signed-off-by: Roger Wang <hey@rogerw.io> Co-authored by: grider-transwithai <grider@transwith.ai>

[Bugfix] Fix HunyuanVL XD-RoPE (#29593)
Signed-off-by: Roger Wang <hey@rogerw.io> Co-authored by: grider-transwithai <grider@transwith.ai>
cf348c8d · Roger Wang · GitHub · a5abd1d3 · cf348c8d · cf348c8d
Unverified Commit cf348c8d authored Nov 27, 2025 by Roger Wang Committed by GitHub Nov 27, 2025
Showing with 4 additions and 4 deletions

vllm/model_executor/models/hunyuan_vision.py vllm/model_executor/models/hunyuan_vision.py +1 -1

vllm/transformers_utils/processors/hunyuan_vl_image.py vllm/transformers_utils/processors/hunyuan_vl_image.py +3 -3

No files found.
--- a/vllm/model_executor/models/hunyuan_vision.py
+++ b/vllm/model_executor/models/hunyuan_vision.py
@@ -847,7 +847,7 @@ class HunYuanVLForConditionalGeneration(
                .expand(-1, llm_grid_w + 1)
                .reshape(-1)
            )
-            h_index[pos : pos + token_num] = 0
+            t_index[pos : pos + token_num] = image_index
        if xd_num == 4:
            llm_positions = torch.stack([p_index, w_index, h_index, t_index])

--- a/vllm/transformers_utils/processors/hunyuan_vl_image.py
+++ b/vllm/transformers_utils/processors/hunyuan_vl_image.py
@@ -195,9 +195,9 @@ class HunYuanVLImageProcessor(BaseImageProcessor):
        processed_images = []
        for image in images:
            if do_resize:
-                resized_width, resized_height = smart_resize(
+                resized_height, resized_width = smart_resize(
-                    width,
+                    height=height,
-                    height,
+                    width=width,
                    factor=patch_size * merge_size,
                    min_pixels=self.min_pixels,
                    max_pixels=self.max_pixels,