Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fe2016de
Unverified
Commit
fe2016de
authored
Oct 23, 2025
by
Cyrus Leung
Committed by
GitHub
Oct 23, 2025
Browse files
[CI/Build] Remove unnecessary flags from test registry (#27353)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
237cf6d3
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
89 additions
and
123 deletions
+89
-123
docs/models/supported_models.md
docs/models/supported_models.md
+2
-2
tests/distributed/test_pipeline_parallel.py
tests/distributed/test_pipeline_parallel.py
+9
-3
tests/distributed/test_sequence_parallel.py
tests/distributed/test_sequence_parallel.py
+9
-3
tests/entrypoints/openai/test_chat_template.py
tests/entrypoints/openai/test_chat_template.py
+3
-1
tests/entrypoints/test_chat_utils.py
tests/entrypoints/test_chat_utils.py
+12
-4
tests/models/multimodal/generation/vlm_utils/core.py
tests/models/multimodal/generation/vlm_utils/core.py
+3
-2
tests/models/multimodal/processing/test_common.py
tests/models/multimodal/processing/test_common.py
+3
-1
tests/models/multimodal/processing/test_tensor_schema.py
tests/models/multimodal/processing/test_tensor_schema.py
+3
-1
tests/models/multimodal/test_mapping.py
tests/models/multimodal/test_mapping.py
+3
-1
tests/models/registry.py
tests/models/registry.py
+30
-71
tests/models/test_initialization.py
tests/models/test_initialization.py
+4
-6
tests/models/utils.py
tests/models/utils.py
+3
-1
vllm/model_executor/models/glm4_1v.py
vllm/model_executor/models/glm4_1v.py
+5
-27
No files found.
docs/models/supported_models.md
View file @
fe2016de
...
@@ -374,8 +374,8 @@ th {
...
@@ -374,8 +374,8 @@ th {
|
`GraniteMoeSharedForCausalLM`
| Granite MoE Shared |
`ibm-research/moe-7b-1b-active-shared-experts`
(test model) | ✅︎ | ✅︎ |
|
`GraniteMoeSharedForCausalLM`
| Granite MoE Shared |
`ibm-research/moe-7b-1b-active-shared-experts`
(test model) | ✅︎ | ✅︎ |
|
`GritLM`
| GritLM |
`parasail-ai/GritLM-7B-vllm`
. | ✅︎ | ✅︎ |
|
`GritLM`
| GritLM |
`parasail-ai/GritLM-7B-vllm`
. | ✅︎ | ✅︎ |
|
`Grok1ModelForCausalLM`
| Grok1 |
`hpcai-tech/grok-1`
. | ✅︎ | ✅︎ |
|
`Grok1ModelForCausalLM`
| Grok1 |
`hpcai-tech/grok-1`
. | ✅︎ | ✅︎ |
|
`HunYuanDenseV1ForCausalLM`
| Hunyuan
-7B-Instruct-0124
|
`tencent/Hunyuan-7B-Instruct
-0124
`
| ✅︎ | ✅︎ |
|
`HunYuanDenseV1ForCausalLM`
| Hunyuan
Dense
|
`tencent/Hunyuan-7B-Instruct`
| ✅︎ | ✅︎ |
|
`HunYuanMoEV1ForCausalLM`
| Hunyuan-
80B-
A13B |
`tencent/Hunyuan-A13B-Instruct`
,
`tencent/Hunyuan-A13B-Pretrain`
,
`tencent/Hunyuan-A13B-Instruct-FP8`
, etc. | ✅︎ | ✅︎ |
|
`HunYuanMoEV1ForCausalLM`
| Hunyuan-A13B |
`tencent/Hunyuan-A13B-Instruct`
,
`tencent/Hunyuan-A13B-Pretrain`
,
`tencent/Hunyuan-A13B-Instruct-FP8`
, etc. | ✅︎ | ✅︎ |
|
`HCXVisionForCausalLM`
| HyperCLOVAX-SEED-Vision-Instruct-3B |
`naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B`
| | |
|
`HCXVisionForCausalLM`
| HyperCLOVAX-SEED-Vision-Instruct-3B |
`naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B`
| | |
|
`InternLMForCausalLM`
| InternLM |
`internlm/internlm-7b`
,
`internlm/internlm-chat-7b`
, etc. | ✅︎ | ✅︎ |
|
`InternLMForCausalLM`
| InternLM |
`internlm/internlm-7b`
,
`internlm/internlm-chat-7b`
, etc. | ✅︎ | ✅︎ |
|
`InternLM2ForCausalLM`
| InternLM2 |
`internlm/internlm2-7b`
,
`internlm/internlm2-chat-7b`
, etc. | ✅︎ | ✅︎ |
|
`InternLM2ForCausalLM`
| InternLM2 |
`internlm/internlm2-7b`
,
`internlm/internlm2-chat-7b`
, etc. | ✅︎ | ✅︎ |
...
...
tests/distributed/test_pipeline_parallel.py
View file @
fe2016de
...
@@ -244,7 +244,7 @@ def _compare_tp(
...
@@ -244,7 +244,7 @@ def _compare_tp(
tokenizer_mode
=
model_info
.
tokenizer_mode
tokenizer_mode
=
model_info
.
tokenizer_mode
hf_overrides
=
model_info
.
hf_overrides
hf_overrides
=
model_info
.
hf_overrides
hf_config
=
get_config
(
model_id
,
trust_remote_code
)
hf_config
=
get_config
(
model_id
,
trust_remote_code
)
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
require_embed_inputs
=
model_info
.
require_embed_inputs
max_num_seqs
=
model_info
.
max_num_seqs
max_num_seqs
=
model_info
.
max_num_seqs
dtype
=
"float16"
dtype
=
"float16"
...
@@ -299,8 +299,14 @@ def _compare_tp(
...
@@ -299,8 +299,14 @@ def _compare_tp(
common_args
.
extend
([
"--load-format"
,
load_format
])
common_args
.
extend
([
"--load-format"
,
load_format
])
if
hf_overrides
:
if
hf_overrides
:
common_args
.
extend
([
"--hf-overrides"
,
json
.
dumps
(
hf_overrides
)])
common_args
.
extend
([
"--hf-overrides"
,
json
.
dumps
(
hf_overrides
)])
if
skip_tokenizer_init
:
if
require_embed_inputs
:
common_args
.
append
(
"--skip-tokenizer-init"
)
common_args
.
extend
(
[
"--skip-tokenizer-init"
,
"--enable-prompt-embeds"
,
"--enable-mm-embeds"
,
]
)
if
max_num_seqs
:
if
max_num_seqs
:
common_args
.
extend
([
"--max-num-seqs"
,
f
"
{
max_num_seqs
}
"
])
common_args
.
extend
([
"--max-num-seqs"
,
f
"
{
max_num_seqs
}
"
])
...
...
tests/distributed/test_sequence_parallel.py
View file @
fe2016de
...
@@ -181,7 +181,7 @@ def _compare_sp(
...
@@ -181,7 +181,7 @@ def _compare_sp(
trust_remote_code
=
model_info
.
trust_remote_code
trust_remote_code
=
model_info
.
trust_remote_code
tokenizer_mode
=
model_info
.
tokenizer_mode
tokenizer_mode
=
model_info
.
tokenizer_mode
hf_overrides
=
model_info
.
hf_overrides
hf_overrides
=
model_info
.
hf_overrides
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
require_embed_inputs
=
model_info
.
require_embed_inputs
if
load_format
==
"dummy"
:
if
load_format
==
"dummy"
:
# Avoid OOM
# Avoid OOM
...
@@ -233,8 +233,14 @@ def _compare_sp(
...
@@ -233,8 +233,14 @@ def _compare_sp(
common_args
.
extend
([
"--load-format"
,
load_format
])
common_args
.
extend
([
"--load-format"
,
load_format
])
if
hf_overrides
:
if
hf_overrides
:
common_args
.
extend
([
"--hf-overrides"
,
json
.
dumps
(
hf_overrides
)])
common_args
.
extend
([
"--hf-overrides"
,
json
.
dumps
(
hf_overrides
)])
if
skip_tokenizer_init
:
if
require_embed_inputs
:
common_args
.
append
(
"--skip-tokenizer-init"
)
common_args
.
extend
(
[
"--skip-tokenizer-init"
,
"--enable-prompt-embeds"
,
"--enable-mm-embeds"
,
]
)
compilation_config
=
{
compilation_config
=
{
"mode"
:
CompilationMode
.
VLLM_COMPILE
,
"mode"
:
CompilationMode
.
VLLM_COMPILE
,
...
...
tests/entrypoints/openai/test_chat_template.py
View file @
fe2016de
...
@@ -114,7 +114,9 @@ def test_get_gen_prompt(
...
@@ -114,7 +114,9 @@ def test_get_gen_prompt(
trust_remote_code
=
model_info
.
trust_remote_code
,
trust_remote_code
=
model_info
.
trust_remote_code
,
revision
=
model_info
.
revision
,
revision
=
model_info
.
revision
,
hf_overrides
=
model_info
.
hf_overrides
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
dtype
=
model_info
.
dtype
,
)
)
...
...
tests/entrypoints/test_chat_utils.py
View file @
fe2016de
...
@@ -1742,7 +1742,9 @@ def test_resolve_hf_chat_template(sample_json_schema, model, use_tools):
...
@@ -1742,7 +1742,9 @@ def test_resolve_hf_chat_template(sample_json_schema, model, use_tools):
revision
=
model_info
.
revision
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
dtype
=
model_info
.
dtype
,
)
)
...
@@ -1842,7 +1844,9 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
...
@@ -1842,7 +1844,9 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
revision
=
model_info
.
revision
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
dtype
=
model_info
.
dtype
,
)
)
...
@@ -1903,7 +1907,9 @@ def test_resolve_content_format_hf_defined(model, expected_format):
...
@@ -1903,7 +1907,9 @@ def test_resolve_content_format_hf_defined(model, expected_format):
revision
=
model_info
.
revision
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
dtype
=
model_info
.
dtype
,
)
)
...
@@ -1961,7 +1967,9 @@ def test_resolve_content_format_fallbacks(model, expected_format):
...
@@ -1961,7 +1967,9 @@ def test_resolve_content_format_fallbacks(model, expected_format):
revision
=
model_info
.
revision
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
dtype
=
model_info
.
dtype
,
)
)
...
...
tests/models/multimodal/generation/vlm_utils/core.py
View file @
fe2016de
...
@@ -71,8 +71,9 @@ def run_test(
...
@@ -71,8 +71,9 @@ def run_test(
vllm_runner_kwargs_
[
"tokenizer_mode"
]
=
model_info
.
tokenizer_mode
vllm_runner_kwargs_
[
"tokenizer_mode"
]
=
model_info
.
tokenizer_mode
if
model_info
.
hf_overrides
:
if
model_info
.
hf_overrides
:
vllm_runner_kwargs_
[
"hf_overrides"
]
=
model_info
.
hf_overrides
vllm_runner_kwargs_
[
"hf_overrides"
]
=
model_info
.
hf_overrides
if
model_info
.
skip_tokenizer_init
:
if
model_info
.
require_embed_inputs
:
vllm_runner_kwargs_
[
"skip_tokenizer_init"
]
=
model_info
.
skip_tokenizer_init
for
k
in
(
"skip_tokenizer_init"
,
"enable_prompt_embeds"
,
"enable_mm_embeds"
):
vllm_runner_kwargs_
[
k
]
=
model_info
.
require_embed_inputs
if
vllm_runner_kwargs
:
if
vllm_runner_kwargs
:
vllm_runner_kwargs_
.
update
(
vllm_runner_kwargs
)
vllm_runner_kwargs_
.
update
(
vllm_runner_kwargs
)
...
...
tests/models/multimodal/processing/test_common.py
View file @
fe2016de
...
@@ -108,7 +108,9 @@ def _test_processing_correctness(
...
@@ -108,7 +108,9 @@ def _test_processing_correctness(
hf_overrides
=
model_info
.
hf_overrides
,
hf_overrides
=
model_info
.
hf_overrides
,
# Ensure that the cache can fit all of the data
# Ensure that the cache can fit all of the data
mm_processor_cache_gb
=
2048
,
mm_processor_cache_gb
=
2048
,
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
dtype
=
model_info
.
dtype
,
)
)
...
...
tests/models/multimodal/processing/test_tensor_schema.py
View file @
fe2016de
...
@@ -218,7 +218,9 @@ def test_model_tensor_schema(model_arch: str, model_id: str):
...
@@ -218,7 +218,9 @@ def test_model_tensor_schema(model_arch: str, model_id: str):
revision
=
model_info
.
revision
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
hf_overrides_fn
,
hf_overrides
=
hf_overrides_fn
,
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
dtype
=
model_info
.
dtype
,
)
)
...
...
tests/models/multimodal/test_mapping.py
View file @
fe2016de
...
@@ -59,7 +59,9 @@ def test_hf_model_weights_mapper(model_arch: str):
...
@@ -59,7 +59,9 @@ def test_hf_model_weights_mapper(model_arch: str):
revision
=
model_info
.
revision
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
dtype
=
model_info
.
dtype
,
)
)
...
...
tests/models/registry.py
View file @
fe2016de
...
@@ -6,7 +6,6 @@ from dataclasses import dataclass, field
...
@@ -6,7 +6,6 @@ from dataclasses import dataclass, field
from
typing
import
Any
,
Literal
from
typing
import
Any
,
Literal
import
pytest
import
pytest
import
torch
from
packaging.version
import
Version
from
packaging.version
import
Version
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
...
@@ -33,6 +32,11 @@ class _HfExamplesInfo:
...
@@ -33,6 +32,11 @@ class _HfExamplesInfo:
for speculative decoding.
for speculative decoding.
"""
"""
speculative_method
:
str
|
None
=
None
"""
The method to use for speculative decoding.
"""
min_transformers_version
:
str
|
None
=
None
min_transformers_version
:
str
|
None
=
None
"""
"""
The minimum version of HF Transformers that is required to run this model.
The minimum version of HF Transformers that is required to run this model.
...
@@ -48,9 +52,10 @@ class _HfExamplesInfo:
...
@@ -48,9 +52,10 @@ class _HfExamplesInfo:
The reason for the minimum/maximum version requirement.
The reason for the minimum/maximum version requirement.
"""
"""
skip_tokenizer_init
:
bool
=
False
require_embed_inputs
:
bool
=
False
"""
"""
If true, skip initialization of tokenizer and detokenizer.
If `True`, enables prompt and multi-modal embedding inputs while
disabling tokenization.
"""
"""
dtype
:
ModelDType
=
"auto"
dtype
:
ModelDType
=
"auto"
...
@@ -168,10 +173,7 @@ class _HfExamplesInfo:
...
@@ -168,10 +173,7 @@ class _HfExamplesInfo:
_TEXT_GENERATION_EXAMPLE_MODELS
=
{
_TEXT_GENERATION_EXAMPLE_MODELS
=
{
# [Decoder-only]
# [Decoder-only]
"ApertusForCausalLM"
:
_HfExamplesInfo
(
"ApertusForCausalLM"
:
_HfExamplesInfo
(
"swiss-ai/Apertus-8B-Instruct-2509"
),
"swiss-ai/Apertus-8B-Instruct-2509"
,
min_transformers_version
=
"4.56.0"
,
),
"AquilaModel"
:
_HfExamplesInfo
(
"BAAI/AquilaChat-7B"
,
trust_remote_code
=
True
),
"AquilaModel"
:
_HfExamplesInfo
(
"BAAI/AquilaChat-7B"
,
trust_remote_code
=
True
),
"AquilaForCausalLM"
:
_HfExamplesInfo
(
"BAAI/AquilaChat2-7B"
,
trust_remote_code
=
True
),
"AquilaForCausalLM"
:
_HfExamplesInfo
(
"BAAI/AquilaChat2-7B"
,
trust_remote_code
=
True
),
"ArceeForCausalLM"
:
_HfExamplesInfo
(
"arcee-ai/AFM-4.5B-Base"
),
"ArceeForCausalLM"
:
_HfExamplesInfo
(
"arcee-ai/AFM-4.5B-Base"
),
...
@@ -192,7 +194,6 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -192,7 +194,6 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
),
),
"BambaForCausalLM"
:
_HfExamplesInfo
(
"BambaForCausalLM"
:
_HfExamplesInfo
(
"ibm-ai-platform/Bamba-9B-v1"
,
"ibm-ai-platform/Bamba-9B-v1"
,
min_transformers_version
=
"4.55.3"
,
extras
=
{
"tiny"
:
"hmellor/tiny-random-BambaForCausalLM"
},
extras
=
{
"tiny"
:
"hmellor/tiny-random-BambaForCausalLM"
},
),
),
"BloomForCausalLM"
:
_HfExamplesInfo
(
"BloomForCausalLM"
:
_HfExamplesInfo
(
...
@@ -212,11 +213,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -212,11 +213,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"CohereForAI/c4ai-command-r7b-12-2024"
,
"CohereForAI/c4ai-command-r7b-12-2024"
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
),
),
"CwmForCausalLM"
:
_HfExamplesInfo
(
"CwmForCausalLM"
:
_HfExamplesInfo
(
"facebook/cwm"
,
min_transformers_version
=
"4.58"
),
"facebook/cwm"
,
trust_remote_code
=
True
,
is_available_online
=
False
,
),
"DbrxForCausalLM"
:
_HfExamplesInfo
(
"databricks/dbrx-instruct"
),
"DbrxForCausalLM"
:
_HfExamplesInfo
(
"databricks/dbrx-instruct"
),
"DeciLMForCausalLM"
:
_HfExamplesInfo
(
"DeciLMForCausalLM"
:
_HfExamplesInfo
(
"nvidia/Llama-3_3-Nemotron-Super-49B-v1"
,
"nvidia/Llama-3_3-Nemotron-Super-49B-v1"
,
...
@@ -232,18 +229,12 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -232,18 +229,12 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
trust_remote_code
=
True
,
trust_remote_code
=
True
,
),
),
"DeepseekV32ForCausalLM"
:
_HfExamplesInfo
(
"deepseek-ai/DeepSeek-V3.2-Exp"
),
"DeepseekV32ForCausalLM"
:
_HfExamplesInfo
(
"deepseek-ai/DeepSeek-V3.2-Exp"
),
"Ernie4_5ForCausalLM"
:
_HfExamplesInfo
(
"Ernie4_5ForCausalLM"
:
_HfExamplesInfo
(
"baidu/ERNIE-4.5-0.3B-PT"
),
"baidu/ERNIE-4.5-0.3B-PT"
,
min_transformers_version
=
"4.54"
"Ernie4_5_MoeForCausalLM"
:
_HfExamplesInfo
(
"baidu/ERNIE-4.5-21B-A3B-PT"
),
),
"Ernie4_5_MoeForCausalLM"
:
_HfExamplesInfo
(
"baidu/ERNIE-4.5-21B-A3B-PT"
,
min_transformers_version
=
"4.54"
),
"ExaoneForCausalLM"
:
_HfExamplesInfo
(
"ExaoneForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
,
trust_remote_code
=
True
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
,
trust_remote_code
=
True
),
),
"Exaone4ForCausalLM"
:
_HfExamplesInfo
(
"Exaone4ForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-4.0-32B"
),
"LGAI-EXAONE/EXAONE-4.0-32B"
,
min_transformers_version
=
"4.54"
),
"Fairseq2LlamaForCausalLM"
:
_HfExamplesInfo
(
"mgleize/fairseq2-dummy-Llama-3.2-1B"
),
"Fairseq2LlamaForCausalLM"
:
_HfExamplesInfo
(
"mgleize/fairseq2-dummy-Llama-3.2-1B"
),
"FalconForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-7b"
),
"FalconForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-7b"
),
"FalconH1ForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/Falcon-H1-0.5B-Base"
),
"FalconH1ForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/Falcon-H1-0.5B-Base"
),
...
@@ -251,14 +242,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -251,14 +242,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"GemmaForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-1.1-2b-it"
),
"GemmaForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-1.1-2b-it"
),
"Gemma2ForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-2-9b"
),
"Gemma2ForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-2-9b"
),
"Gemma3ForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-3-1b-it"
),
"Gemma3ForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-3-1b-it"
),
"Gemma3nForCausalLM"
:
_HfExamplesInfo
(
"Gemma3nForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-3n-E2B-it"
),
"google/gemma-3n-E2B-it"
,
min_transformers_version
=
"4.53"
),
"GlmForCausalLM"
:
_HfExamplesInfo
(
"zai-org/glm-4-9b-chat-hf"
),
"GlmForCausalLM"
:
_HfExamplesInfo
(
"zai-org/glm-4-9b-chat-hf"
),
"Glm4ForCausalLM"
:
_HfExamplesInfo
(
"zai-org/GLM-4-9B-0414"
),
"Glm4ForCausalLM"
:
_HfExamplesInfo
(
"zai-org/GLM-4-9B-0414"
),
"Glm4MoeForCausalLM"
:
_HfExamplesInfo
(
"Glm4MoeForCausalLM"
:
_HfExamplesInfo
(
"zai-org/GLM-4.5"
),
"zai-org/GLM-4.5"
,
min_transformers_version
=
"4.54"
),
"GPT2LMHeadModel"
:
_HfExamplesInfo
(
"openai-community/gpt2"
,
{
"alias"
:
"gpt2"
}),
"GPT2LMHeadModel"
:
_HfExamplesInfo
(
"openai-community/gpt2"
,
{
"alias"
:
"gpt2"
}),
"GPTBigCodeForCausalLM"
:
_HfExamplesInfo
(
"GPTBigCodeForCausalLM"
:
_HfExamplesInfo
(
"bigcode/starcoder"
,
"bigcode/starcoder"
,
...
@@ -266,8 +253,6 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -266,8 +253,6 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"tiny"
:
"bigcode/tiny_starcoder_py"
,
"tiny"
:
"bigcode/tiny_starcoder_py"
,
"santacoder"
:
"bigcode/gpt_bigcode-santacoder"
,
"santacoder"
:
"bigcode/gpt_bigcode-santacoder"
,
},
},
min_transformers_version
=
"4.55.1"
,
transformers_version_reason
=
"HF model broken in 4.55.0"
,
),
),
"GPTJForCausalLM"
:
_HfExamplesInfo
(
"GPTJForCausalLM"
:
_HfExamplesInfo
(
"Milos/slovak-gpt-j-405M"
,
{
"6b"
:
"EleutherAI/gpt-j-6b"
}
"Milos/slovak-gpt-j-405M"
,
{
"6b"
:
"EleutherAI/gpt-j-6b"
}
...
@@ -279,8 +264,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -279,8 +264,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"GraniteForCausalLM"
:
_HfExamplesInfo
(
"ibm/PowerLM-3b"
),
"GraniteForCausalLM"
:
_HfExamplesInfo
(
"ibm/PowerLM-3b"
),
"GraniteMoeForCausalLM"
:
_HfExamplesInfo
(
"ibm/PowerMoE-3b"
),
"GraniteMoeForCausalLM"
:
_HfExamplesInfo
(
"ibm/PowerMoE-3b"
),
"GraniteMoeHybridForCausalLM"
:
_HfExamplesInfo
(
"GraniteMoeHybridForCausalLM"
:
_HfExamplesInfo
(
"ibm-granite/granite-4.0-tiny-preview"
,
"ibm-granite/granite-4.0-tiny-preview"
min_transformers_version
=
"4.55.3"
,
),
),
"GraniteMoeSharedForCausalLM"
:
_HfExamplesInfo
(
"GraniteMoeSharedForCausalLM"
:
_HfExamplesInfo
(
"ibm-research/moe-7b-1b-active-shared-experts"
"ibm-research/moe-7b-1b-active-shared-experts"
...
@@ -288,15 +272,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -288,15 +272,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"Grok1ModelForCausalLM"
:
_HfExamplesInfo
(
"Grok1ModelForCausalLM"
:
_HfExamplesInfo
(
"hpcai-tech/grok-1"
,
trust_remote_code
=
True
"hpcai-tech/grok-1"
,
trust_remote_code
=
True
),
),
"HunYuanDenseV1ForCausalLM"
:
_HfExamplesInfo
(
"tencent/Hunyuan-7B-Instruct"
),
"HunYuanMoEV1ForCausalLM"
:
_HfExamplesInfo
(
"HunYuanMoEV1ForCausalLM"
:
_HfExamplesInfo
(
"tencent/Hunyuan-A13B-Instruct"
,
trust_remote_code
=
True
"tencent/Hunyuan-A13B-Instruct"
,
trust_remote_code
=
True
),
),
# TODO: Remove is_available_online once their config.json is fixed
"HunYuanDenseV1ForCausalLM"
:
_HfExamplesInfo
(
"tencent/Hunyuan-7B-Instruct-0124"
,
trust_remote_code
=
True
,
is_available_online
=
False
,
),
"InternLMForCausalLM"
:
_HfExamplesInfo
(
"InternLMForCausalLM"
:
_HfExamplesInfo
(
"internlm/internlm-chat-7b"
,
trust_remote_code
=
True
"internlm/internlm-chat-7b"
,
trust_remote_code
=
True
),
),
...
@@ -312,15 +291,12 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -312,15 +291,12 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"JAISLMHeadModel"
:
_HfExamplesInfo
(
"inceptionai/jais-13b-chat"
),
"JAISLMHeadModel"
:
_HfExamplesInfo
(
"inceptionai/jais-13b-chat"
),
"JambaForCausalLM"
:
_HfExamplesInfo
(
"JambaForCausalLM"
:
_HfExamplesInfo
(
"ai21labs/AI21-Jamba-1.5-Mini"
,
"ai21labs/AI21-Jamba-1.5-Mini"
,
min_transformers_version
=
"4.55.3"
,
extras
=
{
extras
=
{
"tiny"
:
"ai21labs/Jamba-tiny-dev"
,
"tiny"
:
"ai21labs/Jamba-tiny-dev"
,
"random"
:
"ai21labs/Jamba-tiny-random"
,
"random"
:
"ai21labs/Jamba-tiny-random"
,
},
},
),
),
"Lfm2ForCausalLM"
:
_HfExamplesInfo
(
"Lfm2ForCausalLM"
:
_HfExamplesInfo
(
"LiquidAI/LFM2-1.2B"
),
"LiquidAI/LFM2-1.2B"
,
min_transformers_version
=
"4.54"
),
"Lfm2MoeForCausalLM"
:
_HfExamplesInfo
(
"Lfm2MoeForCausalLM"
:
_HfExamplesInfo
(
"LiquidAI/LFM2-8B-A1B"
,
min_transformers_version
=
"4.58"
"LiquidAI/LFM2-8B-A1B"
,
min_transformers_version
=
"4.58"
),
),
...
@@ -338,7 +314,6 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -338,7 +314,6 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
),
),
"Llama4ForCausalLM"
:
_HfExamplesInfo
(
"Llama4ForCausalLM"
:
_HfExamplesInfo
(
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
,
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
,
is_available_online
=
False
,
),
),
"LongcatFlashForCausalLM"
:
_HfExamplesInfo
(
"LongcatFlashForCausalLM"
:
_HfExamplesInfo
(
"meituan-longcat/LongCat-Flash-Chat"
,
trust_remote_code
=
True
"meituan-longcat/LongCat-Flash-Chat"
,
trust_remote_code
=
True
...
@@ -346,7 +321,6 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -346,7 +321,6 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"MambaForCausalLM"
:
_HfExamplesInfo
(
"state-spaces/mamba-130m-hf"
),
"MambaForCausalLM"
:
_HfExamplesInfo
(
"state-spaces/mamba-130m-hf"
),
"Mamba2ForCausalLM"
:
_HfExamplesInfo
(
"Mamba2ForCausalLM"
:
_HfExamplesInfo
(
"mistralai/Mamba-Codestral-7B-v0.1"
,
"mistralai/Mamba-Codestral-7B-v0.1"
,
min_transformers_version
=
"4.55.3"
,
extras
=
{
extras
=
{
"random"
:
"yujiepan/mamba2-codestral-v0.1-tiny-random"
,
"random"
:
"yujiepan/mamba2-codestral-v0.1-tiny-random"
,
},
},
...
@@ -421,7 +395,6 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -421,7 +395,6 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"SeedOssForCausalLM"
:
_HfExamplesInfo
(
"SeedOssForCausalLM"
:
_HfExamplesInfo
(
"ByteDance-Seed/Seed-OSS-36B-Instruct"
,
"ByteDance-Seed/Seed-OSS-36B-Instruct"
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
is_available_online
=
False
,
),
),
"SmolLM3ForCausalLM"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolLM3-3B"
),
"SmolLM3ForCausalLM"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolLM3-3B"
),
"StableLMEpochForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-zephyr-3b"
),
"StableLMEpochForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-zephyr-3b"
),
...
@@ -488,7 +461,8 @@ _EMBEDDING_EXAMPLE_MODELS = {
...
@@ -488,7 +461,8 @@ _EMBEDDING_EXAMPLE_MODELS = {
"RobertaForMaskedLM"
:
_HfExamplesInfo
(
"sentence-transformers/all-roberta-large-v1"
),
"RobertaForMaskedLM"
:
_HfExamplesInfo
(
"sentence-transformers/all-roberta-large-v1"
),
"XLMRobertaModel"
:
_HfExamplesInfo
(
"intfloat/multilingual-e5-small"
),
"XLMRobertaModel"
:
_HfExamplesInfo
(
"intfloat/multilingual-e5-small"
),
"BertSpladeSparseEmbeddingModel"
:
_HfExamplesInfo
(
"BertSpladeSparseEmbeddingModel"
:
_HfExamplesInfo
(
"naver/splade-v3"
,
is_available_online
=
False
"naver/splade-v3"
,
hf_overrides
=
{
"architectures"
:
[
"BertSpladeSparseEmbeddingModel"
]},
),
),
# [Multimodal]
# [Multimodal]
"CLIPModel"
:
_HfExamplesInfo
(
"openai/clip-vit-base-patch32"
),
"CLIPModel"
:
_HfExamplesInfo
(
"openai/clip-vit-base-patch32"
),
...
@@ -499,18 +473,17 @@ _EMBEDDING_EXAMPLE_MODELS = {
...
@@ -499,18 +473,17 @@ _EMBEDDING_EXAMPLE_MODELS = {
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"MrLight/dse-qwen2-2b-mrl-v1"
),
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"MrLight/dse-qwen2-2b-mrl-v1"
),
"PrithviGeoSpatialMAE"
:
_HfExamplesInfo
(
"PrithviGeoSpatialMAE"
:
_HfExamplesInfo
(
"ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
,
"ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
,
dtype
=
torch
.
float16
,
dtype
=
"
float16
"
,
enforce_eager
=
True
,
enforce_eager
=
True
,
skip_tokenizer_init
=
True
,
require_embed_inputs
=
True
,
# This is to avoid the model
# This is to avoid the model going OOM in CI
# going OOM in CI
max_num_seqs
=
32
,
max_num_seqs
=
32
,
),
),
"Terratorch"
:
_HfExamplesInfo
(
"Terratorch"
:
_HfExamplesInfo
(
"ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
,
"ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
,
dtype
=
torch
.
float16
,
dtype
=
"
float16
"
,
enforce_eager
=
True
,
enforce_eager
=
True
,
skip_tokenizer_init
=
True
,
require_embed_inputs
=
True
,
# This is to avoid the model going OOM in CI
# This is to avoid the model going OOM in CI
max_num_seqs
=
32
,
max_num_seqs
=
32
,
),
),
...
@@ -598,10 +571,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -598,10 +571,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
),
),
"FuyuForCausalLM"
:
_HfExamplesInfo
(
"adept/fuyu-8b"
),
"FuyuForCausalLM"
:
_HfExamplesInfo
(
"adept/fuyu-8b"
),
"Gemma3ForConditionalGeneration"
:
_HfExamplesInfo
(
"google/gemma-3-4b-it"
),
"Gemma3ForConditionalGeneration"
:
_HfExamplesInfo
(
"google/gemma-3-4b-it"
),
"Gemma3nForConditionalGeneration"
:
_HfExamplesInfo
(
"Gemma3nForConditionalGeneration"
:
_HfExamplesInfo
(
"google/gemma-3n-E2B-it"
),
"google/gemma-3n-E2B-it"
,
min_transformers_version
=
"4.53"
,
),
"GraniteSpeechForConditionalGeneration"
:
_HfExamplesInfo
(
"GraniteSpeechForConditionalGeneration"
:
_HfExamplesInfo
(
"ibm-granite/granite-speech-3.3-2b"
"ibm-granite/granite-speech-3.3-2b"
),
),
...
@@ -611,9 +581,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -611,9 +581,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
hf_overrides
=
{
"architectures"
:
[
"GLM4VForCausalLM"
]},
hf_overrides
=
{
"architectures"
:
[
"GLM4VForCausalLM"
]},
),
),
"Glm4vForConditionalGeneration"
:
_HfExamplesInfo
(
"zai-org/GLM-4.1V-9B-Thinking"
),
"Glm4vForConditionalGeneration"
:
_HfExamplesInfo
(
"zai-org/GLM-4.1V-9B-Thinking"
),
"Glm4vMoeForConditionalGeneration"
:
_HfExamplesInfo
(
"Glm4vMoeForConditionalGeneration"
:
_HfExamplesInfo
(
"zai-org/GLM-4.5V"
),
"zai-org/GLM-4.5V"
,
min_transformers_version
=
"4.56"
),
"H2OVLChatModel"
:
_HfExamplesInfo
(
"H2OVLChatModel"
:
_HfExamplesInfo
(
"h2oai/h2ovl-mississippi-800m"
,
"h2oai/h2ovl-mississippi-800m"
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
...
@@ -627,9 +595,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -627,9 +595,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
),
),
"Idefics3ForConditionalGeneration"
:
_HfExamplesInfo
(
"Idefics3ForConditionalGeneration"
:
_HfExamplesInfo
(
"HuggingFaceM4/Idefics3-8B-Llama3"
,
"HuggingFaceM4/Idefics3-8B-Llama3"
,
{
"tiny"
:
"HuggingFaceTB/SmolVLM-256M-Instruct"
},
extras
=
{
"tiny"
:
"HuggingFaceTB/SmolVLM-256M-Instruct"
},
min_transformers_version
=
"4.56"
,
transformers_version_reason
=
"HF model broken in 4.55"
,
),
),
"InternS1ForConditionalGeneration"
:
_HfExamplesInfo
(
"InternS1ForConditionalGeneration"
:
_HfExamplesInfo
(
"internlm/Intern-S1"
,
trust_remote_code
=
True
"internlm/Intern-S1"
,
trust_remote_code
=
True
...
@@ -781,13 +747,11 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -781,13 +747,11 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"Qwen/Qwen3-VL-4B-Instruct"
,
"Qwen/Qwen3-VL-4B-Instruct"
,
max_model_len
=
4096
,
max_model_len
=
4096
,
min_transformers_version
=
"4.57"
,
min_transformers_version
=
"4.57"
,
is_available_online
=
False
,
),
),
"Qwen3VLMoeForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen3VLMoeForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen3-VL-30B-A3B-Instruct"
,
"Qwen/Qwen3-VL-30B-A3B-Instruct"
,
max_model_len
=
4096
,
max_model_len
=
4096
,
min_transformers_version
=
"4.57"
,
min_transformers_version
=
"4.57"
,
is_available_online
=
False
,
),
),
"Qwen3OmniMoeForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen3OmniMoeForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen3-Omni-30B-A3B-Instruct"
,
"Qwen/Qwen3-Omni-30B-A3B-Instruct"
,
...
@@ -799,9 +763,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -799,9 +763,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"Skywork/Skywork-R1V-38B"
,
trust_remote_code
=
True
"Skywork/Skywork-R1V-38B"
,
trust_remote_code
=
True
),
),
"SmolVLMForConditionalGeneration"
:
_HfExamplesInfo
(
"SmolVLMForConditionalGeneration"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
,
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
min_transformers_version
=
"4.56"
,
transformers_version_reason
=
"HF model broken in 4.55"
,
),
),
"Step3VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Step3VLForConditionalGeneration"
:
_HfExamplesInfo
(
"stepfun-ai/step3"
,
trust_remote_code
=
True
"stepfun-ai/step3"
,
trust_remote_code
=
True
...
@@ -817,7 +779,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -817,7 +779,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
),
),
"VoxtralForConditionalGeneration"
:
_HfExamplesInfo
(
"VoxtralForConditionalGeneration"
:
_HfExamplesInfo
(
"mistralai/Voxtral-Mini-3B-2507"
,
"mistralai/Voxtral-Mini-3B-2507"
,
min_transformers_version
=
"4.54"
,
# disable this temporarily until we support HF format
# disable this temporarily until we support HF format
is_available_online
=
False
,
is_available_online
=
False
,
),
),
...
@@ -878,8 +839,8 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
...
@@ -878,8 +839,8 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
"EagleMiniCPMForCausalLM"
:
_HfExamplesInfo
(
"EagleMiniCPMForCausalLM"
:
_HfExamplesInfo
(
"openbmb/MiniCPM-1B-sft-bf16"
,
"openbmb/MiniCPM-1B-sft-bf16"
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
is_available_online
=
False
,
speculative_model
=
"openbmb/MiniCPM-2B-sft-bf16"
,
speculative_model
=
"openbmb/MiniCPM-2B-sft-bf16"
,
speculative_method
=
"eagle"
,
tokenizer
=
"openbmb/MiniCPM-2B-sft-bf16"
,
tokenizer
=
"openbmb/MiniCPM-2B-sft-bf16"
,
),
),
"ErnieMTPModel"
:
_HfExamplesInfo
(
"ErnieMTPModel"
:
_HfExamplesInfo
(
...
@@ -890,8 +851,6 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
...
@@ -890,8 +851,6 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
"Glm4MoeMTPModel"
:
_HfExamplesInfo
(
"Glm4MoeMTPModel"
:
_HfExamplesInfo
(
"zai-org/GLM-4.5"
,
"zai-org/GLM-4.5"
,
speculative_model
=
"zai-org/GLM-4.5"
,
speculative_model
=
"zai-org/GLM-4.5"
,
min_transformers_version
=
"4.56"
,
is_available_online
=
False
,
),
),
"LongCatFlashMTPModel"
:
_HfExamplesInfo
(
"LongCatFlashMTPModel"
:
_HfExamplesInfo
(
"meituan-longcat/LongCat-Flash-Chat"
,
"meituan-longcat/LongCat-Flash-Chat"
,
...
...
tests/models/test_initialization.py
View file @
fe2016de
...
@@ -105,20 +105,19 @@ def can_initialize(
...
@@ -105,20 +105,19 @@ def can_initialize(
if
model_arch
==
"WhisperForConditionalGeneration"
:
if
model_arch
==
"WhisperForConditionalGeneration"
:
m
.
setenv
(
"VLLM_WORKER_MULTIPROC_METHOD"
,
"spawn"
)
m
.
setenv
(
"VLLM_WORKER_MULTIPROC_METHOD"
,
"spawn"
)
extra_args
=
{}
if
model_arch
in
(
"PrithviGeoSpatialMAE"
,
"Terratorch"
):
extra_args
[
"enable_mm_embeds"
]
=
True
LLM
(
LLM
(
model_info
.
default
,
model_info
.
default
,
tokenizer
=
model_info
.
tokenizer
,
tokenizer
=
model_info
.
tokenizer
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
revision
=
model_info
.
revision
,
revision
=
model_info
.
revision
,
enforce_eager
=
model_info
.
enforce_eager
,
enforce_eager
=
model_info
.
enforce_eager
,
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
dtype
=
model_info
.
dtype
,
dtype
=
model_info
.
dtype
,
speculative_config
=
{
speculative_config
=
{
"model"
:
model_info
.
speculative_model
,
"model"
:
model_info
.
speculative_model
,
"method"
:
model_info
.
speculative_method
,
"num_speculative_tokens"
:
1
,
"num_speculative_tokens"
:
1
,
}
}
if
model_info
.
speculative_model
if
model_info
.
speculative_model
...
@@ -133,7 +132,6 @@ def can_initialize(
...
@@ -133,7 +132,6 @@ def can_initialize(
else
"vllm"
,
else
"vllm"
,
hf_overrides
=
hf_overrides_fn
,
hf_overrides
=
hf_overrides_fn
,
max_num_seqs
=
model_info
.
max_num_seqs
,
max_num_seqs
=
model_info
.
max_num_seqs
,
**
extra_args
,
)
)
...
...
tests/models/utils.py
View file @
fe2016de
...
@@ -309,7 +309,9 @@ def build_model_context(
...
@@ -309,7 +309,9 @@ def build_model_context(
limit_mm_per_prompt
=
limit_mm_per_prompt
,
limit_mm_per_prompt
=
limit_mm_per_prompt
,
mm_processor_cache_gb
=
mm_processor_cache_gb
,
mm_processor_cache_gb
=
mm_processor_cache_gb
,
hf_overrides
=
model_info
.
hf_overrides
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
skip_tokenizer_init
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
enforce_eager
=
model_info
.
enforce_eager
,
**
model_config_kwargs
,
**
model_config_kwargs
,
)
)
...
...
vllm/model_executor/models/glm4_1v.py
View file @
fe2016de
...
@@ -36,9 +36,7 @@ import torch
...
@@ -36,9 +36,7 @@ import torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
einops
import
rearrange
from
einops
import
rearrange
from
packaging.version
import
Version
from
transformers
import
BatchFeature
from
transformers
import
BatchFeature
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
from
transformers.models.glm4v.configuration_glm4v
import
Glm4vVisionConfig
from
transformers.models.glm4v.configuration_glm4v
import
Glm4vVisionConfig
from
transformers.models.glm4v.image_processing_glm4v
import
(
from
transformers.models.glm4v.image_processing_glm4v
import
(
Glm4vImageProcessor
,
Glm4vImageProcessor
,
...
@@ -1270,14 +1268,7 @@ class Glm4vMultiModalProcessor(BaseMultiModalProcessor[Glm4vProcessingInfo]):
...
@@ -1270,14 +1268,7 @@ class Glm4vMultiModalProcessor(BaseMultiModalProcessor[Glm4vProcessingInfo]):
video_mm_data
=
dict
()
video_mm_data
=
dict
()
video_mm_data
[
"videos"
]
=
[[
video_array
]]
video_mm_data
[
"videos"
]
=
[[
video_array
]]
# backward compatibility for Transformers 4.55
unuse_metadata
=
[
"do_sample_frames"
]
unuse_metadata
=
[
"do_sample_frames"
]
if
(
not
hasattr
(
VideoMetadata
,
"frames_indices"
)
and
"frames_indices"
in
metadata
):
unuse_metadata
.
append
(
"frames_indices"
)
video_mm_data
[
"video_metadata"
]
=
[
video_mm_data
[
"video_metadata"
]
=
[
[
[
VideoMetadata
(
VideoMetadata
(
...
@@ -1296,24 +1287,11 @@ class Glm4vMultiModalProcessor(BaseMultiModalProcessor[Glm4vProcessingInfo]):
...
@@ -1296,24 +1287,11 @@ class Glm4vMultiModalProcessor(BaseMultiModalProcessor[Glm4vProcessingInfo]):
mm_kwargs
=
video_mm_kwargs
,
mm_kwargs
=
video_mm_kwargs
,
tok_kwargs
=
tok_kwargs
,
tok_kwargs
=
tok_kwargs
,
)
)
if
not
video_mm_kwargs
[
"do_sample_frames"
]
and
Version
(
input_ids
=
video_outputs
.
pop
(
"input_ids"
)
TRANSFORMERS_VERSION
input_ids
[
input_ids
==
processor
.
image_token_id
]
=
(
)
<
Version
(
"4.56.0"
):
processor
.
video_token_id
# Transformers v4.55 has incorrect timestamps issue for
)
# skip sampling. We construct the placeholder manually to
video_placeholder
=
processor
.
tokenizer
.
batch_decode
(
input_ids
)[
0
]
# get placeholders with correct timestamps.
placeholder
=
self
.
info
.
_construct_video_placeholder
(
video_array
,
metadata
,
video_outputs
[
"video_grid_thw"
].
squeeze
(
0
),
)
video_placeholder
=
processor
.
tokenizer
.
decode
(
placeholder
)
else
:
input_ids
=
video_outputs
.
pop
(
"input_ids"
)
input_ids
[
input_ids
==
processor
.
image_token_id
]
=
(
processor
.
video_token_id
)
video_placeholder
=
processor
.
tokenizer
.
batch_decode
(
input_ids
)[
0
]
prompt
=
prompt
.
replace
(
prompt
=
prompt
.
replace
(
"<|begin_of_video|><|video|><|end_of_video|>"
,
"<|begin_of_video|><|video|><|end_of_video|>"
,
video_placeholder
,
video_placeholder
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment