Unverified Commit cf348c8d authored by Roger Wang's avatar Roger Wang Committed by GitHub
Browse files

[Bugfix] Fix HunyuanVL XD-RoPE (#29593)


Signed-off-by: default avatarRoger Wang <hey@rogerw.io>
Co-authored by: grider-transwithai <grider@transwith.ai>
parent a5abd1d3
...@@ -847,7 +847,7 @@ class HunYuanVLForConditionalGeneration( ...@@ -847,7 +847,7 @@ class HunYuanVLForConditionalGeneration(
.expand(-1, llm_grid_w + 1) .expand(-1, llm_grid_w + 1)
.reshape(-1) .reshape(-1)
) )
h_index[pos : pos + token_num] = 0 t_index[pos : pos + token_num] = image_index
if xd_num == 4: if xd_num == 4:
llm_positions = torch.stack([p_index, w_index, h_index, t_index]) llm_positions = torch.stack([p_index, w_index, h_index, t_index])
......
...@@ -195,9 +195,9 @@ class HunYuanVLImageProcessor(BaseImageProcessor): ...@@ -195,9 +195,9 @@ class HunYuanVLImageProcessor(BaseImageProcessor):
processed_images = [] processed_images = []
for image in images: for image in images:
if do_resize: if do_resize:
resized_width, resized_height = smart_resize( resized_height, resized_width = smart_resize(
width, height=height,
height, width=width,
factor=patch_size * merge_size, factor=patch_size * merge_size,
min_pixels=self.min_pixels, min_pixels=self.min_pixels,
max_pixels=self.max_pixels, max_pixels=self.max_pixels,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment