Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
40a87562
Unverified
Commit
40a87562
authored
Dec 27, 2025
by
Isotr0py
Committed by
GitHub
Dec 27, 2025
Browse files
[Chore]: Remove HF format Phi4-MM examples (#31405)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
3d024985
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
0 additions
and
103 deletions
+0
-103
examples/offline_inference/audio_language.py
examples/offline_inference/audio_language.py
+0
-32
examples/offline_inference/vision_language.py
examples/offline_inference/vision_language.py
+0
-36
examples/offline_inference/vision_language_multi_image.py
examples/offline_inference/vision_language_multi_image.py
+0
-35
No files found.
examples/offline_inference/audio_language.py
View file @
40a87562
...
...
@@ -213,37 +213,6 @@ def run_phi4mm(question: str, audio_count: int) -> ModelRequestData:
)
def
run_phi4_multimodal
(
question
:
str
,
audio_count
:
int
)
->
ModelRequestData
:
"""
Phi-4-multimodal-instruct supports both image and audio inputs. Here, we
show how to process audio inputs.
"""
model_path
=
snapshot_download
(
"microsoft/Phi-4-multimodal-instruct"
,
revision
=
"refs/pr/70"
)
# Since the vision-lora and speech-lora co-exist with the base model,
# we have to manually specify the path of the lora weights.
speech_lora_path
=
os
.
path
.
join
(
model_path
,
"speech-lora"
)
placeholders
=
"<|audio|>"
*
audio_count
prompts
=
f
"<|user|>
{
placeholders
}{
question
}
<|end|><|assistant|>"
engine_args
=
EngineArgs
(
model
=
model_path
,
max_model_len
=
12800
,
max_num_seqs
=
2
,
enable_lora
=
True
,
max_lora_rank
=
320
,
limit_mm_per_prompt
=
{
"audio"
:
audio_count
},
)
return
ModelRequestData
(
engine_args
=
engine_args
,
prompt
=
prompts
,
lora_requests
=
[
LoRARequest
(
"speech"
,
1
,
speech_lora_path
)],
)
# Qwen2-Audio
def
run_qwen2_audio
(
question
:
str
,
audio_count
:
int
)
->
ModelRequestData
:
model_name
=
"Qwen/Qwen2-Audio-7B-Instruct"
...
...
@@ -416,7 +385,6 @@ model_example_map = {
"midashenglm"
:
run_midashenglm
,
"minicpmo"
:
run_minicpmo
,
"phi4_mm"
:
run_phi4mm
,
"phi4_multimodal"
:
run_phi4_multimodal
,
"qwen2_audio"
:
run_qwen2_audio
,
"qwen2_5_omni"
:
run_qwen2_5_omni
,
"ultravox"
:
run_ultravox
,
...
...
examples/offline_inference/vision_language.py
View file @
40a87562
...
...
@@ -1424,41 +1424,6 @@ def run_phi4mm(questions: list[str], modality: str) -> ModelRequestData:
)
# HF format Phi-4-multimodal-instruct
def
run_phi4_multimodal
(
questions
:
list
[
str
],
modality
:
str
)
->
ModelRequestData
:
"""
Phi-4-multimodal-instruct supports both image and audio inputs. Here, we
show how to process image inputs.
"""
assert
modality
==
"image"
model_path
=
snapshot_download
(
"microsoft/Phi-4-multimodal-instruct"
,
revision
=
"refs/pr/70"
)
# Since the vision-lora and speech-lora co-exist with the base model,
# we have to manually specify the path of the lora weights.
vision_lora_path
=
os
.
path
.
join
(
model_path
,
"vision-lora"
)
prompts
=
[
f
"<|user|><|image|>
{
question
}
<|end|><|assistant|>"
for
question
in
questions
]
engine_args
=
EngineArgs
(
model
=
model_path
,
max_model_len
=
5120
,
max_num_seqs
=
2
,
max_num_batched_tokens
=
12800
,
enable_lora
=
True
,
max_lora_rank
=
320
,
# Note - mm_processor_kwargs can also be passed to generate/chat calls
mm_processor_kwargs
=
{
"dynamic_hd"
:
16
},
limit_mm_per_prompt
=
{
"image"
:
1
},
)
return
ModelRequestData
(
engine_args
=
engine_args
,
prompts
=
prompts
,
lora_requests
=
[
LoRARequest
(
"vision"
,
1
,
vision_lora_path
)],
)
# Pixtral HF-format
def
run_pixtral_hf
(
questions
:
list
[
str
],
modality
:
str
)
->
ModelRequestData
:
assert
modality
==
"image"
...
...
@@ -1904,7 +1869,6 @@ model_example_map = {
"paligemma2"
:
run_paligemma2
,
"phi3_v"
:
run_phi3v
,
"phi4_mm"
:
run_phi4mm
,
"phi4_multimodal"
:
run_phi4_multimodal
,
"pixtral_hf"
:
run_pixtral_hf
,
"qwen_vl"
:
run_qwen_vl
,
"qwen2_vl"
:
run_qwen2_vl
,
...
...
examples/offline_inference/vision_language_multi_image.py
View file @
40a87562
...
...
@@ -932,40 +932,6 @@ def load_phi4mm(question: str, image_urls: list[str]) -> ModelRequestData:
)
def
load_phi4_multimodal
(
question
:
str
,
image_urls
:
list
[
str
])
->
ModelRequestData
:
"""
Phi-4-multimodal-instruct supports both image and audio inputs. Here, we
show how to process multi images inputs.
"""
model_path
=
snapshot_download
(
"microsoft/Phi-4-multimodal-instruct"
,
revision
=
"refs/pr/70"
)
# Since the vision-lora and speech-lora co-exist with the base model,
# we have to manually specify the path of the lora weights.
vision_lora_path
=
os
.
path
.
join
(
model_path
,
"vision-lora"
)
engine_args
=
EngineArgs
(
model
=
model_path
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
limit_mm_per_prompt
=
{
"image"
:
len
(
image_urls
)},
enable_lora
=
True
,
max_lora_rank
=
320
,
# Note - mm_processor_kwargs can also be passed to generate/chat calls
mm_processor_kwargs
=
{
"dynamic_hd"
:
4
},
)
placeholders
=
"<|image|>"
*
len
(
image_urls
)
prompt
=
f
"<|user|>
{
placeholders
}{
question
}
<|end|><|assistant|>"
return
ModelRequestData
(
engine_args
=
engine_args
,
prompt
=
prompt
,
image_data
=
[
fetch_image
(
url
)
for
url
in
image_urls
],
lora_requests
=
[
LoRARequest
(
"vision"
,
1
,
vision_lora_path
)],
)
def
load_qwen_vl_chat
(
question
:
str
,
image_urls
:
list
[
str
])
->
ModelRequestData
:
model_name
=
"Qwen/Qwen-VL-Chat"
engine_args
=
EngineArgs
(
...
...
@@ -1363,7 +1329,6 @@ model_example_map = {
"paddleocr_vl"
:
load_paddleocr_vl
,
"phi3_v"
:
load_phi3v
,
"phi4_mm"
:
load_phi4mm
,
"phi4_multimodal"
:
load_phi4_multimodal
,
"pixtral_hf"
:
load_pixtral_hf
,
"qwen_vl_chat"
:
load_qwen_vl_chat
,
"qwen2_vl"
:
load_qwen2_vl
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment