Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
205d84aa
Unverified
Commit
205d84aa
authored
Apr 19, 2025
by
Cyrus Leung
Committed by
GitHub
Apr 19, 2025
Browse files
[VLM] Clean up models (#16873)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
5124f5bf
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
2 additions
and
42 deletions
+2
-42
examples/offline_inference/mistral-small.py
examples/offline_inference/mistral-small.py
+1
-0
examples/offline_inference/vision_language.py
examples/offline_inference/vision_language.py
+1
-1
vllm/model_executor/models/phi4mm.py
vllm/model_executor/models/phi4mm.py
+0
-23
vllm/model_executor/models/qwen2_5_omni_thinker.py
vllm/model_executor/models/qwen2_5_omni_thinker.py
+0
-18
No files found.
examples/offline_inference/mistral-small.py
View file @
205d84aa
...
...
@@ -62,6 +62,7 @@ def run_simple_demo(args: argparse.Namespace):
tokenizer_mode
=
"mistral"
if
args
.
format
==
"mistral"
else
"auto"
,
config_format
=
"mistral"
if
args
.
format
==
"mistral"
else
"auto"
,
load_format
=
"mistral"
if
args
.
format
==
"mistral"
else
"auto"
,
limit_mm_per_prompt
=
{
"image"
:
1
},
max_model_len
=
4096
,
max_num_seqs
=
2
,
tensor_parallel_size
=
2
,
...
...
examples/offline_inference/vision_language.py
View file @
205d84aa
...
...
@@ -957,7 +957,7 @@ def run_qwen2_5_omni(questions: list[str], modality: str):
"max_pixels"
:
1280
*
28
*
28
,
"fps"
:
[
1
],
},
disable_mm_preprocessor_cache
=
args
.
disable_mm_preprocessor_cache
,
limit_mm_per_prompt
=
{
"image"
:
1
}
,
)
if
modality
==
"image"
:
...
...
vllm/model_executor/models/phi4mm.py
View file @
205d84aa
...
...
@@ -503,26 +503,6 @@ class Phi4MMProcessingInfo(BaseProcessingInfo):
def
get_supported_mm_limits
(
self
)
->
Mapping
[
str
,
Optional
[
int
]]:
return
{
"audio"
:
None
,
"image"
:
None
}
def
get_mm_max_tokens_per_item
(
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
)
->
Mapping
[
str
,
int
]:
return
{
"image"
:
self
.
get_max_image_tokens
(),
"audio"
:
self
.
get_max_audio_tokens
(),
}
def
get_max_audio_tokens
(
self
)
->
int
:
sr
=
self
.
get_feature_extractor
().
sampling_rate
num_frames
=
self
.
get_audio_num_frames
(
_AUDIO_MAX_SOUNDFILE_SIZE
,
sr
)
return
self
.
_compute_audio_embed_size
(
num_frames
)
def
get_max_image_tokens
(
self
)
->
int
:
target_width
,
target_height
=
self
.
get_image_size_with_most_features
()
return
self
.
get_num_image_tokens
(
image_width
=
target_width
,
image_height
=
target_height
)
def
_find_target_aspect_ratio
(
self
,
orig_width
:
int
,
...
...
@@ -764,9 +744,6 @@ class Phi4MMDummyInputsBuilder(BaseDummyInputsBuilder[Phi4MMProcessingInfo]):
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
target_width
,
target_height
=
\
self
.
info
.
get_image_size_with_most_features
()
target_width
,
target_height
=
\
self
.
info
.
get_image_size_with_most_features
()
...
...
vllm/model_executor/models/qwen2_5_omni_thinker.py
View file @
205d84aa
...
...
@@ -172,26 +172,9 @@ class Qwen2_5OmniThinkerProcessingInfo(Qwen2AudioProcessingInfo,
assert
isinstance
(
feature_extractor
,
WhisperFeatureExtractor
)
return
feature_extractor
def
get_max_audio_tokens
(
self
)
->
int
:
hf_config
=
self
.
get_hf_config
()
max_source_position
=
hf_config
.
audio_config
.
max_source_positions
output_lengths
=
(
max_source_position
-
2
)
//
2
+
1
return
output_lengths
def
get_supported_mm_limits
(
self
)
->
Mapping
[
str
,
Optional
[
int
]]:
return
{
"audio"
:
None
,
"image"
:
None
,
"video"
:
None
}
def
get_mm_max_tokens_per_item
(
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
)
->
Mapping
[
str
,
int
]:
return
{
"audio"
:
self
.
get_max_audio_tokens
(),
"image"
:
self
.
get_max_image_tokens
(),
"video"
:
self
.
get_max_video_tokens
(
seq_len
,
mm_counts
),
}
class
Qwen2_5OmniThinkerDummyInputsBuilder
(
BaseDummyInputsBuilder
[
Qwen2_5OmniThinkerProcessingInfo
]):
...
...
@@ -210,7 +193,6 @@ class Qwen2_5OmniThinkerDummyInputsBuilder(
return
(
audio_token
*
num_audios
+
image_token
*
num_images
+
video_token
*
num_videos
)
# TODO: @abstractmethod after transition
def
get_dummy_mm_data
(
self
,
seq_len
:
int
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment