Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0a995d54
Unverified
Commit
0a995d54
authored
Mar 04, 2025
by
Congcong Chen
Committed by
GitHub
Mar 04, 2025
Browse files
[Model] New model support for Phi-4-multimodal-instruct (#14119)
parent
ade3f7d9
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
7159 additions
and
3 deletions
+7159
-3
docs/source/models/supported_models.md
docs/source/models/supported_models.md
+8
-1
requirements-common.txt
requirements-common.txt
+1
-0
tests/models/registry.py
tests/models/registry.py
+2
-0
vllm/config.py
vllm/config.py
+2
-2
vllm/entrypoints/chat_utils.py
vllm/entrypoints/chat_utils.py
+4
-0
vllm/model_executor/models/phi4mm.py
vllm/model_executor/models/phi4mm.py
+1803
-0
vllm/model_executor/models/phi4mm_audio.py
vllm/model_executor/models/phi4mm_audio.py
+1403
-0
vllm/model_executor/models/phi4mm_utils.py
vllm/model_executor/models/phi4mm_utils.py
+1969
-0
vllm/model_executor/models/registry.py
vllm/model_executor/models/registry.py
+1
-0
vllm/model_executor/models/vision_siglip_navit.py
vllm/model_executor/models/vision_siglip_navit.py
+1966
-0
No files found.
docs/source/models/supported_models.md
View file @
0a995d54
...
@@ -410,7 +410,7 @@ See [this page](#generative-models) for more information on how to use generativ
...
@@ -410,7 +410,7 @@ See [this page](#generative-models) for more information on how to use generativ
*
✅︎
*
✅︎
-
*
`Phi3ForCausalLM`
-
*
`Phi3ForCausalLM`
*
Phi-4, Phi-3
*
Phi-4, Phi-3
*
`microsoft/Phi-4`
,
`microsoft/Phi-3-mini-4k-instruct`
,
`microsoft/Phi-3-mini-128k-instruct`
,
`microsoft/Phi-3-medium-128k-instruct`
, etc.
*
`microsoft/Phi-4-mini-instruct`
,
`microsoft/Phi-4`
,
`microsoft/Phi-3-mini-4k-instruct`
,
`microsoft/Phi-3-mini-128k-instruct`
,
`microsoft/Phi-3-medium-128k-instruct`
, etc.
*
✅︎
*
✅︎
*
✅︎
*
✅︎
-
*
`Phi3SmallForCausalLM`
-
*
`Phi3SmallForCausalLM`
...
@@ -856,6 +856,13 @@ See [this page](#generative-models) for more information on how to use generativ
...
@@ -856,6 +856,13 @@ See [this page](#generative-models) for more information on how to use generativ
*
*
*
✅︎
*
✅︎
*
✅︎
*
✅︎
-
*
`Phi4MMForCausalLM`
*
Phi-4-multimodal
*
T + I
<sup>
+
</sup>
/ T + A
<sup>
+
</sup>
/ I
<sup>
+
</sup>
+ A
<sup>
+
</sup>
*
`microsoft/Phi-4-multimodal-instruct`
, etc.
*
✅︎
*
*
-
*
`PixtralForConditionalGeneration`
-
*
`PixtralForConditionalGeneration`
*
Pixtral
*
Pixtral
*
T + I
<sup>
+
</sup>
*
T + I
<sup>
+
</sup>
...
...
requirements-common.txt
View file @
0a995d54
...
@@ -37,3 +37,4 @@ depyf==0.18.0 # required for profiling and debugging with compilation config
...
@@ -37,3 +37,4 @@ depyf==0.18.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
watchfiles # required for http server to monitor the updates of TLS files
watchfiles # required for http server to monitor the updates of TLS files
python-json-logger # Used by logging as per examples/other/logging_configuration.md
python-json-logger # Used by logging as per examples/other/logging_configuration.md
scipy # Required for phi-4-multimodal-instruct
\ No newline at end of file
tests/models/registry.py
View file @
0a995d54
...
@@ -272,6 +272,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -272,6 +272,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
extras
=
{
"v2"
:
"google/paligemma2-3b-ft-docci-448"
}),
# noqa: E501
extras
=
{
"v2"
:
"google/paligemma2-3b-ft-docci-448"
}),
# noqa: E501
"Phi3VForCausalLM"
:
_HfExamplesInfo
(
"microsoft/Phi-3-vision-128k-instruct"
,
"Phi3VForCausalLM"
:
_HfExamplesInfo
(
"microsoft/Phi-3-vision-128k-instruct"
,
trust_remote_code
=
True
),
trust_remote_code
=
True
),
"Phi4MMForCausalLM"
:
_HfExamplesInfo
(
"microsoft/Phi-4-multimodal-instruct"
,
trust_remote_code
=
True
),
"PixtralForConditionalGeneration"
:
_HfExamplesInfo
(
"mistralai/Pixtral-12B-2409"
,
# noqa: E501
"PixtralForConditionalGeneration"
:
_HfExamplesInfo
(
"mistralai/Pixtral-12B-2409"
,
# noqa: E501
tokenizer_mode
=
"mistral"
),
tokenizer_mode
=
"mistral"
),
"QwenVLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen-VL"
,
"QwenVLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen-VL"
,
...
...
vllm/config.py
View file @
0a995d54
...
@@ -2284,9 +2284,9 @@ class LoRAConfig:
...
@@ -2284,9 +2284,9 @@ class LoRAConfig:
return
hash_str
return
hash_str
def
__post_init__
(
self
):
def
__post_init__
(
self
):
# Setting the maximum rank to
256
should be able to satisfy the vast
# Setting the maximum rank to
512
should be able to satisfy the vast
# majority of applications.
# majority of applications.
possible_max_ranks
=
(
8
,
16
,
32
,
64
,
128
,
256
)
possible_max_ranks
=
(
8
,
16
,
32
,
64
,
128
,
256
,
320
,
512
)
possible_lora_extra_vocab_size
=
(
0
,
256
,
512
)
possible_lora_extra_vocab_size
=
(
0
,
256
,
512
)
if
self
.
max_lora_rank
not
in
possible_max_ranks
:
if
self
.
max_lora_rank
not
in
possible_max_ranks
:
raise
ValueError
(
raise
ValueError
(
...
...
vllm/entrypoints/chat_utils.py
View file @
0a995d54
...
@@ -395,6 +395,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
...
@@ -395,6 +395,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
if
model_type
==
"phi3_v"
:
if
model_type
==
"phi3_v"
:
# Workaround since this token is not defined in the tokenizer
# Workaround since this token is not defined in the tokenizer
return
f
"<|image_
{
current_count
}
|>"
return
f
"<|image_
{
current_count
}
|>"
if
model_type
==
"phi4mm"
:
return
"<|endoftext10|>"
# 200010 (see vocab.json in hf model)
if
model_type
in
(
"minicpmo"
,
"minicpmv"
):
if
model_type
in
(
"minicpmo"
,
"minicpmv"
):
return
"(<image>./</image>)"
return
"(<image>./</image>)"
if
model_type
in
(
"blip-2"
,
"chatglm"
,
"fuyu"
,
"paligemma"
,
if
model_type
in
(
"blip-2"
,
"chatglm"
,
"fuyu"
,
"paligemma"
,
...
@@ -424,6 +426,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
...
@@ -424,6 +426,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
elif
modality
==
"audio"
:
elif
modality
==
"audio"
:
if
model_type
==
"ultravox"
:
if
model_type
==
"ultravox"
:
return
"<|audio|>"
return
"<|audio|>"
if
model_type
==
"phi4mm"
:
return
"<|endoftext11|>"
# 200011 (see vocab.json in hf model)
if
model_type
==
"qwen2_audio"
:
if
model_type
==
"qwen2_audio"
:
return
(
f
"Audio
{
current_count
}
: "
return
(
f
"Audio
{
current_count
}
: "
f
"<|audio_bos|><|AUDIO|><|audio_eos|>"
)
f
"<|audio_bos|><|AUDIO|><|audio_eos|>"
)
...
...
vllm/model_executor/models/phi4mm.py
0 → 100644
View file @
0a995d54
This diff is collapsed.
Click to expand it.
vllm/model_executor/models/phi4mm_audio.py
0 → 100644
View file @
0a995d54
This diff is collapsed.
Click to expand it.
vllm/model_executor/models/phi4mm_utils.py
0 → 100644
View file @
0a995d54
This diff is collapsed.
Click to expand it.
vllm/model_executor/models/registry.py
View file @
0a995d54
...
@@ -182,6 +182,7 @@ _MULTIMODAL_MODELS = {
...
@@ -182,6 +182,7 @@ _MULTIMODAL_MODELS = {
"Qwen2_5_VLForConditionalGeneration"
:
(
"qwen2_5_vl"
,
"Qwen2_5_VLForConditionalGeneration"
),
# noqa: E501
"Qwen2_5_VLForConditionalGeneration"
:
(
"qwen2_5_vl"
,
"Qwen2_5_VLForConditionalGeneration"
),
# noqa: E501
"Qwen2AudioForConditionalGeneration"
:
(
"qwen2_audio"
,
"Qwen2AudioForConditionalGeneration"
),
# noqa: E501
"Qwen2AudioForConditionalGeneration"
:
(
"qwen2_audio"
,
"Qwen2AudioForConditionalGeneration"
),
# noqa: E501
"UltravoxModel"
:
(
"ultravox"
,
"UltravoxModel"
),
"UltravoxModel"
:
(
"ultravox"
,
"UltravoxModel"
),
"Phi4MMForCausalLM"
:
(
"phi4mm"
,
"Phi4MMForCausalLM"
),
# [Encoder-decoder]
# [Encoder-decoder]
"Florence2ForConditionalGeneration"
:
(
"florence2"
,
"Florence2ForConditionalGeneration"
),
# noqa: E501
"Florence2ForConditionalGeneration"
:
(
"florence2"
,
"Florence2ForConditionalGeneration"
),
# noqa: E501
"MllamaForConditionalGeneration"
:
(
"mllama"
,
"MllamaForConditionalGeneration"
),
# noqa: E501
"MllamaForConditionalGeneration"
:
(
"mllama"
,
"MllamaForConditionalGeneration"
),
# noqa: E501
...
...
vllm/model_executor/models/vision_siglip_navit.py
0 → 100644
View file @
0a995d54
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment