Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9279b9f8
Unverified
Commit
9279b9f8
authored
Jan 06, 2025
by
Roger Wang
Committed by
GitHub
Jan 06, 2025
Browse files
[Bugfix] Fix max image size for LLaVA-Onevision (#11769)
Signed-off-by:
Roger Wang
<
ywang@roblox.com
>
parent
ee77fdb5
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
2 deletions
+18
-2
vllm/model_executor/models/llava_onevision.py
vllm/model_executor/models/llava_onevision.py
+18
-2
No files found.
vllm/model_executor/models/llava_onevision.py
View file @
9279b9f8
...
...
@@ -19,8 +19,8 @@ from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
MultiModalKwargs
,
NestedTensors
from
vllm.multimodal.parse
import
(
MultiModalDataItems
,
VideoEmbeddingItems
,
VideoProcessorItems
)
from
vllm.multimodal.parse
import
(
ImageSize
,
MultiModalDataItems
,
VideoEmbeddingItems
,
VideoProcessorItems
)
from
vllm.multimodal.processing
import
MultiModalFieldConfig
,
PromptReplacement
from
vllm.multimodal.profiling
import
BaseProfilingInfo
,
ProcessorInputs
from
vllm.sequence
import
IntermediateTensors
...
...
@@ -170,6 +170,22 @@ class LlavaOnevisionProcessingMixin(LlavaNextProcessingMixin):
class
LlavaOnevisionProfilingInfo
(
LlavaOnevisionProcessingMixin
,
BaseLlavaProfilingInfo
):
def
_get_image_size_with_most_features
(
self
)
->
ImageSize
:
hf_config
=
self
.
_get_hf_config
()
largest_feature_size
,
largest_feature_pinpoint
=
0
,
None
for
(
height
,
width
)
in
hf_config
.
image_grid_pinpoints
:
feat_size
=
self
.
_get_num_image_tokens
(
image_width
=
width
,
image_height
=
height
)
if
feat_size
>
largest_feature_size
:
largest_feature_size
=
feat_size
largest_feature_pinpoint
=
ImageSize
(
width
=
width
,
height
=
height
)
if
largest_feature_size
==
0
or
largest_feature_pinpoint
is
None
:
raise
ValueError
(
"Cannot have a largest feature size of 0!"
)
return
largest_feature_pinpoint
def
get_supported_mm_limits
(
self
)
->
Mapping
[
str
,
Optional
[
int
]]:
return
{
"image"
:
None
,
"video"
:
None
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment