"vllm/vscode:/vscode.git/clone" did not exist on "fc5ebbd1d3453461ea6e00a78faf87c41d1aa625"
Unverified Commit ba214dff authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Bugfix] Fix precision error in LLaVA-NeXT (#11735)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent eed11ebe
...@@ -15,10 +15,9 @@ def processor_for_llava_next(): ...@@ -15,10 +15,9 @@ def processor_for_llava_next():
return LlavaNextMultiModalProcessor return LlavaNextMultiModalProcessor
# FIXME: image_size [(198, 176), (176, 198)]
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"]) @pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
@pytest.mark.parametrize("image_size", [(1669, 2560), (2560, 1669), (183, 488), @pytest.mark.parametrize("image_size", [(1669, 2560), (2560, 1669), (183, 488),
(488, 183)]) (488, 183), (198, 176), (176, 198)])
@pytest.mark.parametrize("num_imgs", [1, 2]) @pytest.mark.parametrize("num_imgs", [1, 2])
def test_processor_prompt_replacements( def test_processor_prompt_replacements(
processor_for_llava_next, processor_for_llava_next,
......
...@@ -2,6 +2,7 @@ from functools import cached_property ...@@ -2,6 +2,7 @@ from functools import cached_property
from typing import (Iterable, List, Literal, Mapping, Optional, Set, Tuple, from typing import (Iterable, List, Literal, Mapping, Optional, Set, Tuple,
TypedDict, Union) TypedDict, Union)
import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
from transformers import BatchFeature, LlavaNextConfig, LlavaNextProcessor from transformers import BatchFeature, LlavaNextConfig, LlavaNextProcessor
...@@ -139,16 +140,21 @@ class LlavaNextMultiModalProcessor(LlavaMultiModalProcessor): ...@@ -139,16 +140,21 @@ class LlavaNextMultiModalProcessor(LlavaMultiModalProcessor):
current_height = npatches * num_patch_height current_height = npatches * num_patch_height
current_width = npatches * num_patch_width current_width = npatches * num_patch_width
original_aspect_ratio = original_width / original_height # NOTE: HF resizes based on float32
current_aspect_ratio = current_width / current_height original_aspect_ratio = np.array(original_width / original_height,
dtype=np.float32)
current_aspect_ratio = np.array(current_width / current_height,
dtype=np.float32)
if original_aspect_ratio > current_aspect_ratio: if original_aspect_ratio > current_aspect_ratio:
scale_factor = current_width / original_width scale_factor = np.array(current_width / original_width,
dtype=np.float32)
new_height = int(original_height * scale_factor) new_height = int(original_height * scale_factor)
padding = (current_height - new_height) // 2 padding = (current_height - new_height) // 2
current_height -= 2 * padding current_height -= 2 * padding
else: else:
scale_factor = current_height / original_height scale_factor = np.array(current_height / original_height,
dtype=np.float32)
new_width = int(original_width * scale_factor) new_width = int(original_width * scale_factor)
padding = (current_width - new_width) // 2 padding = (current_width - new_width) // 2
current_width -= 2 * padding current_width -= 2 * padding
......
...@@ -3,6 +3,7 @@ from functools import cached_property ...@@ -3,6 +3,7 @@ from functools import cached_property
from typing import (Iterable, List, Literal, Mapping, Optional, Set, Tuple, from typing import (Iterable, List, Literal, Mapping, Optional, Set, Tuple,
TypedDict, Union) TypedDict, Union)
import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
from transformers import (BatchFeature, LlavaOnevisionConfig, from transformers import (BatchFeature, LlavaOnevisionConfig,
...@@ -127,18 +128,24 @@ class LlavaOnevisionMultiModalProcessor(LlavaNextMultiModalProcessor): ...@@ -127,18 +128,24 @@ class LlavaOnevisionMultiModalProcessor(LlavaNextMultiModalProcessor):
current_height = npatches * num_patch_height current_height = npatches * num_patch_height
current_width = npatches * num_patch_width current_width = npatches * num_patch_width
original_aspect_ratio = original_width / original_height # NOTE: HF resizes based on float32
current_aspect_ratio = current_width / current_height original_aspect_ratio = np.array(original_width / original_height,
dtype=np.float32)
current_aspect_ratio = np.array(current_width / current_height,
dtype=np.float32)
if original_aspect_ratio > current_aspect_ratio: if original_aspect_ratio > current_aspect_ratio:
new_height = int(original_height * scale_factor = np.array(current_width / original_width,
(current_width / original_width)) dtype=np.float32)
new_height = int(original_height * scale_factor)
padding = (current_height - new_height) // 2 padding = (current_height - new_height) // 2
current_height -= padding * 2 current_height -= 2 * padding
else: else:
new_width = int(original_width * scale_factor = np.array(current_height / original_height,
(current_height / original_height)) dtype=np.float32)
new_width = int(original_width * scale_factor)
padding = (current_width - new_width) // 2 padding = (current_width - new_width) // 2
current_width -= padding * 2 current_width -= 2 * padding
unpadded_features = current_height * current_width unpadded_features = current_height * current_width
newline_features = current_height newline_features = current_height
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment