Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
65552b47
Unverified
Commit
65552b47
authored
Aug 09, 2025
by
Cyrus Leung
Committed by
GitHub
Aug 08, 2025
Browse files
[Misc] Use config definitions from Transformers library (#21913)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
7ad7adb6
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
54 additions
and
51 deletions
+54
-51
vllm/model_executor/models/aimv2.py
vllm/model_executor/models/aimv2.py
+11
-11
vllm/model_executor/models/commandr.py
vllm/model_executor/models/commandr.py
+4
-4
vllm/model_executor/models/dbrx.py
vllm/model_executor/models/dbrx.py
+7
-7
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+9
-6
vllm/model_executor/models/dots1.py
vllm/model_executor/models/dots1.py
+4
-4
vllm/model_executor/models/exaone4.py
vllm/model_executor/models/exaone4.py
+3
-3
vllm/model_executor/models/glm4_moe.py
vllm/model_executor/models/glm4_moe.py
+5
-5
vllm/model_executor/models/minimax_text_01.py
vllm/model_executor/models/minimax_text_01.py
+3
-3
vllm/model_executor/models/olmoe.py
vllm/model_executor/models/olmoe.py
+2
-2
vllm/model_executor/models/qwen2_moe.py
vllm/model_executor/models/qwen2_moe.py
+3
-3
vllm/model_executor/models/qwen3_moe.py
vllm/model_executor/models/qwen3_moe.py
+3
-3
No files found.
vllm/model_executor/models/aimv2.py
View file @
65552b47
...
...
@@ -8,7 +8,6 @@ from typing import Optional
import
torch
import
torch.nn
as
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
MultiHeadAttention
from
vllm.distributed
import
get_tensor_model_parallel_world_size
...
...
@@ -21,12 +20,13 @@ from vllm.model_executor.layers.linear import (MergedColumnParallelLinear,
from
vllm.model_executor.layers.quantization.base_config
import
(
QuantizationConfig
)
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.transformers_utils.configs.ovis
import
AIMv2Config
class
AIMv2SwiGLUFFN
(
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
,
quant_config
:
QuantizationConfig
,
prefix
:
str
):
def
__init__
(
self
,
config
:
AIMv2Config
,
quant_config
:
Quantization
Config
,
prefix
:
str
):
super
().
__init__
()
hidden_features
=
config
.
intermediate_size
in_features
=
config
.
hidden_size
...
...
@@ -57,7 +57,7 @@ class AIMv2SwiGLUFFN(nn.Module):
class
AIMv2PatchEmbed
(
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
):
def
__init__
(
self
,
config
:
AIMv2
Config
):
super
().
__init__
()
self
.
proj
=
nn
.
Conv2d
(
config
.
num_channels
,
...
...
@@ -75,7 +75,7 @@ class AIMv2PatchEmbed(nn.Module):
class
AIMv2ViTPreprocessor
(
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
):
def
__init__
(
self
,
config
:
AIMv2
Config
):
super
().
__init__
()
num_patches
=
(
config
.
image_size
//
config
.
patch_size
)
**
2
...
...
@@ -93,8 +93,8 @@ class AIMv2ViTPreprocessor(nn.Module):
class
AIMv2Attention
(
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
,
quant_config
:
QuantizationConfig
,
prefix
:
str
):
def
__init__
(
self
,
config
:
AIMv2Config
,
quant_config
:
Quantization
Config
,
prefix
:
str
):
super
().
__init__
()
self
.
config
=
config
self
.
embed_dim
=
config
.
hidden_size
...
...
@@ -141,8 +141,8 @@ class AIMv2Attention(nn.Module):
class
AIMv2Block
(
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
,
quant_config
:
QuantizationConfig
,
prefix
:
str
):
def
__init__
(
self
,
config
:
AIMv2Config
,
quant_config
:
Quantization
Config
,
prefix
:
str
):
super
().
__init__
()
self
.
attn
=
AIMv2Attention
(
config
,
quant_config
=
quant_config
,
...
...
@@ -163,7 +163,7 @@ class AIMv2Transformer(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
AIMv2
Config
,
quant_config
:
QuantizationConfig
,
*
,
require_post_norm
:
Optional
[
bool
]
=
None
,
...
...
@@ -193,7 +193,7 @@ class AIMv2Transformer(nn.Module):
class
AIMv2Model
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
AIMv2
Config
,
quant_config
:
QuantizationConfig
,
*
,
require_post_norm
:
Optional
[
bool
]
=
None
,
...
...
vllm/model_executor/models/commandr.py
View file @
65552b47
...
...
@@ -27,7 +27,7 @@ from typing import Optional, Union
import
torch
from
torch
import
nn
from
transformers
import
CohereConfig
from
transformers
import
Cohere2Config
,
CohereConfig
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
...
...
@@ -89,7 +89,7 @@ class CohereMLP(nn.Module):
def
__init__
(
self
,
config
:
CohereConfig
,
config
:
Union
[
CohereConfig
,
Cohere2Config
],
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
):
...
...
@@ -124,7 +124,7 @@ class CohereAttention(nn.Module):
def
__init__
(
self
,
config
:
CohereConfig
,
config
:
Union
[
CohereConfig
,
Cohere2Config
],
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
@@ -242,7 +242,7 @@ class CohereAttention(nn.Module):
class
CohereDecoderLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
:
CohereConfig
,
config
:
Union
[
CohereConfig
,
Cohere2Config
],
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
):
...
...
vllm/model_executor/models/dbrx.py
View file @
65552b47
...
...
@@ -6,7 +6,7 @@ from typing import Optional, Union
import
torch
import
torch.nn
as
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Dbrx
Config
from
vllm.attention
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
...
...
@@ -39,7 +39,7 @@ class DbrxRouter(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
):
super
().
__init__
()
...
...
@@ -63,7 +63,7 @@ class DbrxExperts(FusedMoE):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
prefix
:
str
=
""
,
...
...
@@ -138,7 +138,7 @@ class DbrxMoE(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
prefix
:
str
=
""
,
...
...
@@ -169,7 +169,7 @@ class DbrxAttention(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
@@ -249,7 +249,7 @@ class DbrxFusedNormAttention(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
@@ -284,7 +284,7 @@ class DbrxBlock(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
vllm/model_executor/models/deepseek_v2.py
View file @
65552b47
...
...
@@ -29,7 +29,7 @@ from typing import Any, Optional, Union
import
torch
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
import
DeepseekV2Config
,
DeepseekV3
Config
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
...
...
@@ -100,7 +100,7 @@ class DeepseekV2MoE(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Union
[
DeepseekV2Config
,
DeepseekV3
Config
]
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
enable_eplb
:
bool
=
False
,
...
...
@@ -221,7 +221,7 @@ class DeepseekV2Attention(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Union
[
DeepseekV2Config
,
DeepseekV3
Config
]
,
hidden_size
:
int
,
num_heads
:
int
,
qk_nope_head_dim
:
int
,
...
...
@@ -373,7 +373,7 @@ class DeepseekV2MLAAttention(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Union
[
DeepseekV2Config
,
DeepseekV3
Config
]
,
hidden_size
:
int
,
num_heads
:
int
,
qk_nope_head_dim
:
int
,
...
...
@@ -538,7 +538,7 @@ class DeepseekV2DecoderLayer(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Union
[
DeepseekV2Config
,
DeepseekV3
Config
]
,
prefix
:
str
,
model_config
:
ModelConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
...
...
@@ -973,7 +973,10 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
pass
def
get_spec_layer_idx_from_weight_name
(
config
:
PretrainedConfig
,
# Compatibility with
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
def
get_spec_layer_idx_from_weight_name
(
config
:
Union
[
DeepseekV2Config
,
DeepseekV3Config
],
weight_name
:
str
)
->
Optional
[
int
]:
if
(
hasattr
(
config
,
"num_nextn_predict_layers"
)
and
config
.
num_nextn_predict_layers
>
0
):
...
...
vllm/model_executor/models/dots1.py
View file @
65552b47
...
...
@@ -29,7 +29,7 @@ from typing import Any, Optional, Union
import
torch
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Dots1
Config
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
...
...
@@ -99,7 +99,7 @@ class Dots1MoE(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Dots1
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
):
...
...
@@ -174,7 +174,7 @@ class Dots1Attention(nn.Module):
hidden_size
:
int
,
num_heads
:
int
,
num_kv_heads
:
int
,
config
:
Pretrained
Config
,
config
:
Dots1
Config
,
rope_theta
:
float
=
10000
,
rope_scaling
:
Optional
[
dict
[
str
,
Any
]]
=
None
,
max_position_embeddings
:
int
=
8192
,
...
...
@@ -260,7 +260,7 @@ class Dots1DecoderLayer(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Dots1
Config
,
prefix
:
str
,
model_config
:
ModelConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
...
...
vllm/model_executor/models/exaone4.py
View file @
65552b47
...
...
@@ -26,7 +26,7 @@ from typing import Any, Optional, Union
import
torch
from
torch
import
nn
from
transformers
import
Pretrai
ne
d
Config
from
transformers
import
Exao
ne
4
Config
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
...
...
@@ -96,7 +96,7 @@ class Exaone4Attention(nn.Module):
def
__init__
(
self
,
config
:
Pretrai
ne
d
Config
,
config
:
Exao
ne
4
Config
,
hidden_size
:
int
,
num_heads
:
int
,
num_kv_heads
:
int
,
...
...
@@ -224,7 +224,7 @@ class Exaone4DecoderLayer(nn.Module):
def
__init__
(
self
,
config
:
Pretrai
ne
d
Config
,
config
:
Exao
ne
4
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
vllm/model_executor/models/glm4_moe.py
View file @
65552b47
...
...
@@ -28,7 +28,7 @@ from typing import Any, Optional, Union
import
torch
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
.models.glm4_moe
import
Glm4Moe
Config
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
...
...
@@ -100,7 +100,7 @@ class Glm4MoE(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Glm4Moe
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
enable_eplb
:
bool
=
False
,
...
...
@@ -198,7 +198,7 @@ class Glm4MoeAttention(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Glm4Moe
Config
,
hidden_size
:
int
,
num_heads
:
int
,
num_kv_heads
:
int
,
...
...
@@ -297,7 +297,7 @@ class Glm4MoeDecoderLayer(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Glm4Moe
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
@@ -681,7 +681,7 @@ class Glm4MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
return
self
.
model
.
get_expert_mapping
()
def
get_spec_layer_idx_from_weight_name
(
config
:
Pretrained
Config
,
def
get_spec_layer_idx_from_weight_name
(
config
:
Glm4Moe
Config
,
weight_name
:
str
)
->
Optional
[
int
]:
if
hasattr
(
config
,
"num_nextn_predict_layers"
)
and
(
config
.
num_nextn_predict_layers
...
...
vllm/model_executor/models/minimax_text_01.py
View file @
65552b47
...
...
@@ -12,7 +12,7 @@ import torch.distributed
import
torch.nn.functional
as
F
from
einops
import
rearrange
from
torch
import
nn
from
transformers
.configuration_utils
import
Pretrained
Config
from
transformers
import
MiniMax
Config
from
vllm
import
envs
from
vllm.attention
import
Attention
,
AttentionMetadata
...
...
@@ -656,7 +656,7 @@ class MiniMaxText01DecoderLayer(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
MiniMax
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
expert_num
:
int
=
1
,
...
...
@@ -860,7 +860,7 @@ class MiniMaxText01Model(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
MiniMax
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
scheduler_config
=
None
,
...
...
vllm/model_executor/models/olmoe.py
View file @
65552b47
...
...
@@ -19,7 +19,7 @@ from typing import Any, Optional, Union
import
torch
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Olmoe
Config
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
...
...
@@ -205,7 +205,7 @@ class OlmoeDecoderLayer(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Olmoe
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
vllm/model_executor/models/qwen2_moe.py
View file @
65552b47
...
...
@@ -30,7 +30,7 @@ from typing import Any, Optional, Union
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Qwen2Moe
Config
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
...
...
@@ -98,7 +98,7 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Qwen2Moe
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
):
...
...
@@ -256,7 +256,7 @@ class Qwen2MoeDecoderLayer(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Qwen2Moe
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
vllm/model_executor/models/qwen3_moe.py
View file @
65552b47
...
...
@@ -28,7 +28,7 @@ from typing import Any, Optional, Union
import
torch
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Qwen3Moe
Config
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
...
...
@@ -101,7 +101,7 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Qwen3Moe
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
enable_eplb
:
bool
=
False
,
...
...
@@ -278,7 +278,7 @@ class Qwen3MoeDecoderLayer(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Qwen3Moe
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment