Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
84149043
Unverified
Commit
84149043
authored
Aug 27, 2025
by
Isotr0py
Committed by
GitHub
Aug 27, 2025
Browse files
[Model] Enable native HF format InternVL support (#23742)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
3af47c3c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
18 additions
and
16 deletions
+18
-16
docs/models/supported_models.md
docs/models/supported_models.md
+1
-0
tests/models/multimodal/generation/test_common.py
tests/models/multimodal/generation/test_common.py
+14
-15
tests/models/registry.py
tests/models/registry.py
+2
-1
vllm/model_executor/models/registry.py
vllm/model_executor/models/registry.py
+1
-0
No files found.
docs/models/supported_models.md
View file @
84149043
...
@@ -629,6 +629,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
...
@@ -629,6 +629,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
|
`Idefics3ForConditionalGeneration`
| Idefics3 | T + I |
`HuggingFaceM4/Idefics3-8B-Llama3`
, etc. | ✅︎ | | ✅︎ |
|
`Idefics3ForConditionalGeneration`
| Idefics3 | T + I |
`HuggingFaceM4/Idefics3-8B-Llama3`
, etc. | ✅︎ | | ✅︎ |
|
`InternS1ForConditionalGeneration`
| Intern-S1 | T + I
<sup>
E+
</sup>
+ V
<sup>
E+
</sup>
|
`internlm/Intern-S1`
, etc. | ✅︎ | ✅︎ | ✅︎ |
|
`InternS1ForConditionalGeneration`
| Intern-S1 | T + I
<sup>
E+
</sup>
+ V
<sup>
E+
</sup>
|
`internlm/Intern-S1`
, etc. | ✅︎ | ✅︎ | ✅︎ |
|
`InternVLChatModel`
| InternVL 3.5, InternVL 3.0, InternVideo 2.5, InternVL 2.5, Mono-InternVL, InternVL 2.0 | T + I
<sup>
E+
</sup>
+ (V
<sup>
E+
</sup>
) |
`OpenGVLab/InternVL3_5-14B`
,
`OpenGVLab/InternVL3-9B`
,
`OpenGVLab/InternVideo2_5_Chat_8B`
,
`OpenGVLab/InternVL2_5-4B`
,
`OpenGVLab/Mono-InternVL-2B`
,
`OpenGVLab/InternVL2-4B`
, etc. | ✅︎ | ✅︎ | ✅︎ |
|
`InternVLChatModel`
| InternVL 3.5, InternVL 3.0, InternVideo 2.5, InternVL 2.5, Mono-InternVL, InternVL 2.0 | T + I
<sup>
E+
</sup>
+ (V
<sup>
E+
</sup>
) |
`OpenGVLab/InternVL3_5-14B`
,
`OpenGVLab/InternVL3-9B`
,
`OpenGVLab/InternVideo2_5_Chat_8B`
,
`OpenGVLab/InternVL2_5-4B`
,
`OpenGVLab/Mono-InternVL-2B`
,
`OpenGVLab/InternVL2-4B`
, etc. | ✅︎ | ✅︎ | ✅︎ |
|
`InternVLForConditionalGeneration`
| InternVL 3.0 (HF format) | T + I
<sup>
E+
</sup>
+ V
<sup>
E+
</sup>
|
`OpenGVLab/InternVL3-1B-hf`
, etc. | ✅︎ | ✅︎ | ✅︎ |
|
`KeyeForConditionalGeneration`
| Keye-VL-8B-Preview | T + I
<sup>
E+
</sup>
+ V
<sup>
E+
</sup>
|
`Kwai-Keye/Keye-VL-8B-Preview`
| | | ✅︎ |
|
`KeyeForConditionalGeneration`
| Keye-VL-8B-Preview | T + I
<sup>
E+
</sup>
+ V
<sup>
E+
</sup>
|
`Kwai-Keye/Keye-VL-8B-Preview`
| | | ✅︎ |
|
`KimiVLForConditionalGeneration`
| Kimi-VL-A3B-Instruct, Kimi-VL-A3B-Thinking | T + I
<sup>
+
</sup>
|
`moonshotai/Kimi-VL-A3B-Instruct`
,
`moonshotai/Kimi-VL-A3B-Thinking`
| | ✅︎ | ✅︎ |
|
`KimiVLForConditionalGeneration`
| Kimi-VL-A3B-Instruct, Kimi-VL-A3B-Thinking | T + I
<sup>
+
</sup>
|
`moonshotai/Kimi-VL-A3B-Instruct`
,
`moonshotai/Kimi-VL-A3B-Thinking`
| | ✅︎ | ✅︎ |
|
`Llama4ForConditionalGeneration`
| Llama 4 | T + I
<sup>
+
</sup>
|
`meta-llama/Llama-4-Scout-17B-16E-Instruct`
,
`meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`
,
`meta-llama/Llama-4-Maverick-17B-128E-Instruct`
, etc. | | ✅︎ | ✅︎ |
|
`Llama4ForConditionalGeneration`
| Llama 4 | T + I
<sup>
+
</sup>
|
`meta-llama/Llama-4-Scout-17B-16E-Instruct`
,
`meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`
,
`meta-llama/Llama-4-Maverick-17B-128E-Instruct`
, etc. | | ✅︎ | ✅︎ |
...
...
tests/models/multimodal/generation/test_common.py
View file @
84149043
...
@@ -222,21 +222,6 @@ VLM_TEST_SETTINGS = {
...
@@ -222,21 +222,6 @@ VLM_TEST_SETTINGS = {
},
},
marks
=
[
large_gpu_mark
(
min_gb
=
32
)],
marks
=
[
large_gpu_mark
(
min_gb
=
32
)],
),
),
# Check "auto" with fallback to transformers
"internvl-transformers"
:
VLMTestInfo
(
models
=
[
"OpenGVLab/InternVL3-1B-hf"
],
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
),
prompt_formatter
=
lambda
img_prompt
:
f
"<|im_start|>User
\n
{
img_prompt
}
<|im_end|>
\n
<|im_start|>Assistant
\n
"
,
# noqa: E501
img_idx_to_prompt
=
lambda
idx
:
"<IMG_CONTEXT>"
,
max_model_len
=
4096
,
use_tokenizer_eos
=
True
,
image_size_factors
=
[(
0.25
,
0.5
,
1.0
)],
vllm_runner_kwargs
=
{
"model_impl"
:
"auto"
,
},
auto_cls
=
AutoModelForImageTextToText
,
marks
=
[
pytest
.
mark
.
core_model
],
),
#### Extended model tests
#### Extended model tests
"aria"
:
VLMTestInfo
(
"aria"
:
VLMTestInfo
(
models
=
[
"rhymes-ai/Aria"
],
models
=
[
"rhymes-ai/Aria"
],
...
@@ -461,6 +446,20 @@ VLM_TEST_SETTINGS = {
...
@@ -461,6 +446,20 @@ VLM_TEST_SETTINGS = {
use_tokenizer_eos
=
True
,
use_tokenizer_eos
=
True
,
patch_hf_runner
=
model_utils
.
internvl_patch_hf_runner
,
patch_hf_runner
=
model_utils
.
internvl_patch_hf_runner
,
),
),
"intern_vl-hf"
:
VLMTestInfo
(
models
=
[
"OpenGVLab/InternVL3-1B-hf"
],
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
,
VLMTestType
.
VIDEO
,
),
prompt_formatter
=
lambda
img_prompt
:
f
"<|im_start|>User
\n
{
img_prompt
}
<|im_end|>
\n
<|im_start|>Assistant
\n
"
,
# noqa: E501
img_idx_to_prompt
=
lambda
idx
:
"<IMG_CONTEXT>"
,
video_idx_to_prompt
=
lambda
idx
:
"<video>"
,
max_model_len
=
8192
,
use_tokenizer_eos
=
True
,
auto_cls
=
AutoModelForImageTextToText
,
),
"kimi_vl"
:
VLMTestInfo
(
"kimi_vl"
:
VLMTestInfo
(
models
=
[
"moonshotai/Kimi-VL-A3B-Instruct"
],
models
=
[
"moonshotai/Kimi-VL-A3B-Instruct"
],
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
),
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
),
...
...
tests/models/registry.py
View file @
84149043
...
@@ -429,6 +429,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -429,6 +429,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"3.5-qwen3moe"
:
"OpenGVLab/InternVL3_5-30B-A3B"
,
# noqa: E501
"3.5-qwen3moe"
:
"OpenGVLab/InternVL3_5-30B-A3B"
,
# noqa: E501
"3.5-gptoss"
:
"OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview"
},
# noqa: E501
"3.5-gptoss"
:
"OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview"
},
# noqa: E501
trust_remote_code
=
True
),
trust_remote_code
=
True
),
"InternVLForConditionalGeneration"
:
_HfExamplesInfo
(
"OpenGVLab/InternVL3-1B-hf"
),
# noqa: E501
"KeyeForConditionalGeneration"
:
_HfExamplesInfo
(
"Kwai-Keye/Keye-VL-8B-Preview"
,
# noqa: E501
"KeyeForConditionalGeneration"
:
_HfExamplesInfo
(
"Kwai-Keye/Keye-VL-8B-Preview"
,
# noqa: E501
trust_remote_code
=
True
),
trust_remote_code
=
True
),
"KimiVLForConditionalGeneration"
:
_HfExamplesInfo
(
"moonshotai/Kimi-VL-A3B-Instruct"
,
# noqa: E501
"KimiVLForConditionalGeneration"
:
_HfExamplesInfo
(
"moonshotai/Kimi-VL-A3B-Instruct"
,
# noqa: E501
...
@@ -584,7 +585,7 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
...
@@ -584,7 +585,7 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
_TRANSFORMERS_BACKEND_MODELS
=
{
_TRANSFORMERS_BACKEND_MODELS
=
{
"TransformersModel"
:
_HfExamplesInfo
(
"Qwen/Qwen3-Embedding-0.6B"
),
"TransformersModel"
:
_HfExamplesInfo
(
"Qwen/Qwen3-Embedding-0.6B"
),
"TransformersForCausalLM"
:
_HfExamplesInfo
(
"hmellor/Ilama-3.2-1B"
,
trust_remote_code
=
True
),
# noqa: E501
"TransformersForCausalLM"
:
_HfExamplesInfo
(
"hmellor/Ilama-3.2-1B"
,
trust_remote_code
=
True
),
# noqa: E501
"TransformersForMultimodalLM"
:
_HfExamplesInfo
(
"
OpenGVLab/InternVL3-1B
-hf"
),
"TransformersForMultimodalLM"
:
_HfExamplesInfo
(
"
BAAI/Emu3-Chat
-hf"
),
}
}
_EXAMPLE_MODELS
=
{
_EXAMPLE_MODELS
=
{
...
...
vllm/model_executor/models/registry.py
View file @
84149043
...
@@ -220,6 +220,7 @@ _MULTIMODAL_MODELS = {
...
@@ -220,6 +220,7 @@ _MULTIMODAL_MODELS = {
"H2OVLChatModel"
:
(
"h2ovl"
,
"H2OVLChatModel"
),
"H2OVLChatModel"
:
(
"h2ovl"
,
"H2OVLChatModel"
),
"InternVLChatModel"
:
(
"internvl"
,
"InternVLChatModel"
),
"InternVLChatModel"
:
(
"internvl"
,
"InternVLChatModel"
),
"InternS1ForConditionalGeneration"
:
(
"interns1"
,
"InternS1ForConditionalGeneration"
),
# noqa: E501
"InternS1ForConditionalGeneration"
:
(
"interns1"
,
"InternS1ForConditionalGeneration"
),
# noqa: E501
"InternVLForConditionalGeneration"
:
(
"interns1"
,
"InternS1ForConditionalGeneration"
),
# noqa: E501
"Idefics3ForConditionalGeneration"
:(
"idefics3"
,
"Idefics3ForConditionalGeneration"
),
"Idefics3ForConditionalGeneration"
:(
"idefics3"
,
"Idefics3ForConditionalGeneration"
),
"SmolVLMForConditionalGeneration"
:
(
"smolvlm"
,
"SmolVLMForConditionalGeneration"
),
# noqa: E501
"SmolVLMForConditionalGeneration"
:
(
"smolvlm"
,
"SmolVLMForConditionalGeneration"
),
# noqa: E501
"KeyeForConditionalGeneration"
:
(
"keye"
,
"KeyeForConditionalGeneration"
),
"KeyeForConditionalGeneration"
:
(
"keye"
,
"KeyeForConditionalGeneration"
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment