Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
65552b47
Unverified
Commit
65552b47
authored
Aug 09, 2025
by
Cyrus Leung
Committed by
GitHub
Aug 08, 2025
Browse files
[Misc] Use config definitions from Transformers library (#21913)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
7ad7adb6
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
54 additions
and
51 deletions
+54
-51
vllm/model_executor/models/aimv2.py
vllm/model_executor/models/aimv2.py
+11
-11
vllm/model_executor/models/commandr.py
vllm/model_executor/models/commandr.py
+4
-4
vllm/model_executor/models/dbrx.py
vllm/model_executor/models/dbrx.py
+7
-7
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+9
-6
vllm/model_executor/models/dots1.py
vllm/model_executor/models/dots1.py
+4
-4
vllm/model_executor/models/exaone4.py
vllm/model_executor/models/exaone4.py
+3
-3
vllm/model_executor/models/glm4_moe.py
vllm/model_executor/models/glm4_moe.py
+5
-5
vllm/model_executor/models/minimax_text_01.py
vllm/model_executor/models/minimax_text_01.py
+3
-3
vllm/model_executor/models/olmoe.py
vllm/model_executor/models/olmoe.py
+2
-2
vllm/model_executor/models/qwen2_moe.py
vllm/model_executor/models/qwen2_moe.py
+3
-3
vllm/model_executor/models/qwen3_moe.py
vllm/model_executor/models/qwen3_moe.py
+3
-3
No files found.
vllm/model_executor/models/aimv2.py
View file @
65552b47
...
@@ -8,7 +8,6 @@ from typing import Optional
...
@@ -8,7 +8,6 @@ from typing import Optional
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
MultiHeadAttention
from
vllm.attention.layer
import
MultiHeadAttention
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
...
@@ -21,12 +20,13 @@ from vllm.model_executor.layers.linear import (MergedColumnParallelLinear,
...
@@ -21,12 +20,13 @@ from vllm.model_executor.layers.linear import (MergedColumnParallelLinear,
from
vllm.model_executor.layers.quantization.base_config
import
(
from
vllm.model_executor.layers.quantization.base_config
import
(
QuantizationConfig
)
QuantizationConfig
)
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.transformers_utils.configs.ovis
import
AIMv2Config
class
AIMv2SwiGLUFFN
(
nn
.
Module
):
class
AIMv2SwiGLUFFN
(
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
,
def
__init__
(
self
,
config
:
AIMv2Config
,
quant_config
:
Quantization
Config
,
quant_config
:
QuantizationConfig
,
prefix
:
str
):
prefix
:
str
):
super
().
__init__
()
super
().
__init__
()
hidden_features
=
config
.
intermediate_size
hidden_features
=
config
.
intermediate_size
in_features
=
config
.
hidden_size
in_features
=
config
.
hidden_size
...
@@ -57,7 +57,7 @@ class AIMv2SwiGLUFFN(nn.Module):
...
@@ -57,7 +57,7 @@ class AIMv2SwiGLUFFN(nn.Module):
class
AIMv2PatchEmbed
(
nn
.
Module
):
class
AIMv2PatchEmbed
(
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
):
def
__init__
(
self
,
config
:
AIMv2
Config
):
super
().
__init__
()
super
().
__init__
()
self
.
proj
=
nn
.
Conv2d
(
self
.
proj
=
nn
.
Conv2d
(
config
.
num_channels
,
config
.
num_channels
,
...
@@ -75,7 +75,7 @@ class AIMv2PatchEmbed(nn.Module):
...
@@ -75,7 +75,7 @@ class AIMv2PatchEmbed(nn.Module):
class
AIMv2ViTPreprocessor
(
nn
.
Module
):
class
AIMv2ViTPreprocessor
(
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
):
def
__init__
(
self
,
config
:
AIMv2
Config
):
super
().
__init__
()
super
().
__init__
()
num_patches
=
(
config
.
image_size
//
config
.
patch_size
)
**
2
num_patches
=
(
config
.
image_size
//
config
.
patch_size
)
**
2
...
@@ -93,8 +93,8 @@ class AIMv2ViTPreprocessor(nn.Module):
...
@@ -93,8 +93,8 @@ class AIMv2ViTPreprocessor(nn.Module):
class
AIMv2Attention
(
nn
.
Module
):
class
AIMv2Attention
(
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
,
def
__init__
(
self
,
config
:
AIMv2Config
,
quant_config
:
Quantization
Config
,
quant_config
:
QuantizationConfig
,
prefix
:
str
):
prefix
:
str
):
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
embed_dim
=
config
.
hidden_size
self
.
embed_dim
=
config
.
hidden_size
...
@@ -141,8 +141,8 @@ class AIMv2Attention(nn.Module):
...
@@ -141,8 +141,8 @@ class AIMv2Attention(nn.Module):
class
AIMv2Block
(
nn
.
Module
):
class
AIMv2Block
(
nn
.
Module
):
def
__init__
(
self
,
config
:
Pretrained
Config
,
def
__init__
(
self
,
config
:
AIMv2Config
,
quant_config
:
Quantization
Config
,
quant_config
:
QuantizationConfig
,
prefix
:
str
):
prefix
:
str
):
super
().
__init__
()
super
().
__init__
()
self
.
attn
=
AIMv2Attention
(
config
,
self
.
attn
=
AIMv2Attention
(
config
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
...
@@ -163,7 +163,7 @@ class AIMv2Transformer(nn.Module):
...
@@ -163,7 +163,7 @@ class AIMv2Transformer(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
AIMv2
Config
,
quant_config
:
QuantizationConfig
,
quant_config
:
QuantizationConfig
,
*
,
*
,
require_post_norm
:
Optional
[
bool
]
=
None
,
require_post_norm
:
Optional
[
bool
]
=
None
,
...
@@ -193,7 +193,7 @@ class AIMv2Transformer(nn.Module):
...
@@ -193,7 +193,7 @@ class AIMv2Transformer(nn.Module):
class
AIMv2Model
(
torch
.
nn
.
Module
):
class
AIMv2Model
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
AIMv2
Config
,
quant_config
:
QuantizationConfig
,
quant_config
:
QuantizationConfig
,
*
,
*
,
require_post_norm
:
Optional
[
bool
]
=
None
,
require_post_norm
:
Optional
[
bool
]
=
None
,
...
...
vllm/model_executor/models/commandr.py
View file @
65552b47
...
@@ -27,7 +27,7 @@ from typing import Optional, Union
...
@@ -27,7 +27,7 @@ from typing import Optional, Union
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
CohereConfig
from
transformers
import
Cohere2Config
,
CohereConfig
from
vllm.attention
import
Attention
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
...
@@ -89,7 +89,7 @@ class CohereMLP(nn.Module):
...
@@ -89,7 +89,7 @@ class CohereMLP(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
CohereConfig
,
config
:
Union
[
CohereConfig
,
Cohere2Config
],
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
):
):
...
@@ -124,7 +124,7 @@ class CohereAttention(nn.Module):
...
@@ -124,7 +124,7 @@ class CohereAttention(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
CohereConfig
,
config
:
Union
[
CohereConfig
,
Cohere2Config
],
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
@@ -242,7 +242,7 @@ class CohereAttention(nn.Module):
...
@@ -242,7 +242,7 @@ class CohereAttention(nn.Module):
class
CohereDecoderLayer
(
nn
.
Module
):
class
CohereDecoderLayer
(
nn
.
Module
):
def
__init__
(
self
,
def
__init__
(
self
,
config
:
CohereConfig
,
config
:
Union
[
CohereConfig
,
Cohere2Config
],
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
):
prefix
:
str
=
""
):
...
...
vllm/model_executor/models/dbrx.py
View file @
65552b47
...
@@ -6,7 +6,7 @@ from typing import Optional, Union
...
@@ -6,7 +6,7 @@ from typing import Optional, Union
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Dbrx
Config
from
vllm.attention
import
Attention
from
vllm.attention
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
...
@@ -39,7 +39,7 @@ class DbrxRouter(nn.Module):
...
@@ -39,7 +39,7 @@ class DbrxRouter(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
):
):
super
().
__init__
()
super
().
__init__
()
...
@@ -63,7 +63,7 @@ class DbrxExperts(FusedMoE):
...
@@ -63,7 +63,7 @@ class DbrxExperts(FusedMoE):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
@@ -138,7 +138,7 @@ class DbrxMoE(nn.Module):
...
@@ -138,7 +138,7 @@ class DbrxMoE(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
@@ -169,7 +169,7 @@ class DbrxAttention(nn.Module):
...
@@ -169,7 +169,7 @@ class DbrxAttention(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
@@ -249,7 +249,7 @@ class DbrxFusedNormAttention(nn.Module):
...
@@ -249,7 +249,7 @@ class DbrxFusedNormAttention(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
@@ -284,7 +284,7 @@ class DbrxBlock(nn.Module):
...
@@ -284,7 +284,7 @@ class DbrxBlock(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Dbrx
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
...
vllm/model_executor/models/deepseek_v2.py
View file @
65552b47
...
@@ -29,7 +29,7 @@ from typing import Any, Optional, Union
...
@@ -29,7 +29,7 @@ from typing import Any, Optional, Union
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
import
DeepseekV2Config
,
DeepseekV3
Config
from
vllm.attention
import
Attention
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
...
@@ -100,7 +100,7 @@ class DeepseekV2MoE(nn.Module):
...
@@ -100,7 +100,7 @@ class DeepseekV2MoE(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Union
[
DeepseekV2Config
,
DeepseekV3
Config
]
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
enable_eplb
:
bool
=
False
,
enable_eplb
:
bool
=
False
,
...
@@ -221,7 +221,7 @@ class DeepseekV2Attention(nn.Module):
...
@@ -221,7 +221,7 @@ class DeepseekV2Attention(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Union
[
DeepseekV2Config
,
DeepseekV3
Config
]
,
hidden_size
:
int
,
hidden_size
:
int
,
num_heads
:
int
,
num_heads
:
int
,
qk_nope_head_dim
:
int
,
qk_nope_head_dim
:
int
,
...
@@ -373,7 +373,7 @@ class DeepseekV2MLAAttention(nn.Module):
...
@@ -373,7 +373,7 @@ class DeepseekV2MLAAttention(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Union
[
DeepseekV2Config
,
DeepseekV3
Config
]
,
hidden_size
:
int
,
hidden_size
:
int
,
num_heads
:
int
,
num_heads
:
int
,
qk_nope_head_dim
:
int
,
qk_nope_head_dim
:
int
,
...
@@ -538,7 +538,7 @@ class DeepseekV2DecoderLayer(nn.Module):
...
@@ -538,7 +538,7 @@ class DeepseekV2DecoderLayer(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Union
[
DeepseekV2Config
,
DeepseekV3
Config
]
,
prefix
:
str
,
prefix
:
str
,
model_config
:
ModelConfig
,
model_config
:
ModelConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
...
@@ -973,7 +973,10 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
...
@@ -973,7 +973,10 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
pass
pass
def
get_spec_layer_idx_from_weight_name
(
config
:
PretrainedConfig
,
# Compatibility with
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
def
get_spec_layer_idx_from_weight_name
(
config
:
Union
[
DeepseekV2Config
,
DeepseekV3Config
],
weight_name
:
str
)
->
Optional
[
int
]:
weight_name
:
str
)
->
Optional
[
int
]:
if
(
hasattr
(
config
,
"num_nextn_predict_layers"
)
if
(
hasattr
(
config
,
"num_nextn_predict_layers"
)
and
config
.
num_nextn_predict_layers
>
0
):
and
config
.
num_nextn_predict_layers
>
0
):
...
...
vllm/model_executor/models/dots1.py
View file @
65552b47
...
@@ -29,7 +29,7 @@ from typing import Any, Optional, Union
...
@@ -29,7 +29,7 @@ from typing import Any, Optional, Union
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Dots1
Config
from
vllm.attention
import
Attention
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
...
@@ -99,7 +99,7 @@ class Dots1MoE(nn.Module):
...
@@ -99,7 +99,7 @@ class Dots1MoE(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Dots1
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
):
):
...
@@ -174,7 +174,7 @@ class Dots1Attention(nn.Module):
...
@@ -174,7 +174,7 @@ class Dots1Attention(nn.Module):
hidden_size
:
int
,
hidden_size
:
int
,
num_heads
:
int
,
num_heads
:
int
,
num_kv_heads
:
int
,
num_kv_heads
:
int
,
config
:
Pretrained
Config
,
config
:
Dots1
Config
,
rope_theta
:
float
=
10000
,
rope_theta
:
float
=
10000
,
rope_scaling
:
Optional
[
dict
[
str
,
Any
]]
=
None
,
rope_scaling
:
Optional
[
dict
[
str
,
Any
]]
=
None
,
max_position_embeddings
:
int
=
8192
,
max_position_embeddings
:
int
=
8192
,
...
@@ -260,7 +260,7 @@ class Dots1DecoderLayer(nn.Module):
...
@@ -260,7 +260,7 @@ class Dots1DecoderLayer(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Dots1
Config
,
prefix
:
str
,
prefix
:
str
,
model_config
:
ModelConfig
,
model_config
:
ModelConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
...
...
vllm/model_executor/models/exaone4.py
View file @
65552b47
...
@@ -26,7 +26,7 @@ from typing import Any, Optional, Union
...
@@ -26,7 +26,7 @@ from typing import Any, Optional, Union
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Pretrai
ne
d
Config
from
transformers
import
Exao
ne
4
Config
from
vllm.attention
import
Attention
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
...
@@ -96,7 +96,7 @@ class Exaone4Attention(nn.Module):
...
@@ -96,7 +96,7 @@ class Exaone4Attention(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrai
ne
d
Config
,
config
:
Exao
ne
4
Config
,
hidden_size
:
int
,
hidden_size
:
int
,
num_heads
:
int
,
num_heads
:
int
,
num_kv_heads
:
int
,
num_kv_heads
:
int
,
...
@@ -224,7 +224,7 @@ class Exaone4DecoderLayer(nn.Module):
...
@@ -224,7 +224,7 @@ class Exaone4DecoderLayer(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrai
ne
d
Config
,
config
:
Exao
ne
4
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
...
vllm/model_executor/models/glm4_moe.py
View file @
65552b47
...
@@ -28,7 +28,7 @@ from typing import Any, Optional, Union
...
@@ -28,7 +28,7 @@ from typing import Any, Optional, Union
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
.models.glm4_moe
import
Glm4Moe
Config
from
vllm.attention
import
Attention
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
...
@@ -100,7 +100,7 @@ class Glm4MoE(nn.Module):
...
@@ -100,7 +100,7 @@ class Glm4MoE(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Glm4Moe
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
enable_eplb
:
bool
=
False
,
enable_eplb
:
bool
=
False
,
...
@@ -198,7 +198,7 @@ class Glm4MoeAttention(nn.Module):
...
@@ -198,7 +198,7 @@ class Glm4MoeAttention(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Glm4Moe
Config
,
hidden_size
:
int
,
hidden_size
:
int
,
num_heads
:
int
,
num_heads
:
int
,
num_kv_heads
:
int
,
num_kv_heads
:
int
,
...
@@ -297,7 +297,7 @@ class Glm4MoeDecoderLayer(nn.Module):
...
@@ -297,7 +297,7 @@ class Glm4MoeDecoderLayer(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Glm4Moe
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
@@ -681,7 +681,7 @@ class Glm4MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
...
@@ -681,7 +681,7 @@ class Glm4MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
return
self
.
model
.
get_expert_mapping
()
return
self
.
model
.
get_expert_mapping
()
def
get_spec_layer_idx_from_weight_name
(
config
:
Pretrained
Config
,
def
get_spec_layer_idx_from_weight_name
(
config
:
Glm4Moe
Config
,
weight_name
:
str
)
->
Optional
[
int
]:
weight_name
:
str
)
->
Optional
[
int
]:
if
hasattr
(
config
,
if
hasattr
(
config
,
"num_nextn_predict_layers"
)
and
(
config
.
num_nextn_predict_layers
"num_nextn_predict_layers"
)
and
(
config
.
num_nextn_predict_layers
...
...
vllm/model_executor/models/minimax_text_01.py
View file @
65552b47
...
@@ -12,7 +12,7 @@ import torch.distributed
...
@@ -12,7 +12,7 @@ import torch.distributed
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
einops
import
rearrange
from
einops
import
rearrange
from
torch
import
nn
from
torch
import
nn
from
transformers
.configuration_utils
import
Pretrained
Config
from
transformers
import
MiniMax
Config
from
vllm
import
envs
from
vllm
import
envs
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.attention
import
Attention
,
AttentionMetadata
...
@@ -656,7 +656,7 @@ class MiniMaxText01DecoderLayer(nn.Module):
...
@@ -656,7 +656,7 @@ class MiniMaxText01DecoderLayer(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
MiniMax
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
expert_num
:
int
=
1
,
expert_num
:
int
=
1
,
...
@@ -860,7 +860,7 @@ class MiniMaxText01Model(nn.Module):
...
@@ -860,7 +860,7 @@ class MiniMaxText01Model(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
MiniMax
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
scheduler_config
=
None
,
scheduler_config
=
None
,
...
...
vllm/model_executor/models/olmoe.py
View file @
65552b47
...
@@ -19,7 +19,7 @@ from typing import Any, Optional, Union
...
@@ -19,7 +19,7 @@ from typing import Any, Optional, Union
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Olmoe
Config
from
vllm.attention
import
Attention
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
...
@@ -205,7 +205,7 @@ class OlmoeDecoderLayer(nn.Module):
...
@@ -205,7 +205,7 @@ class OlmoeDecoderLayer(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Olmoe
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
...
vllm/model_executor/models/qwen2_moe.py
View file @
65552b47
...
@@ -30,7 +30,7 @@ from typing import Any, Optional, Union
...
@@ -30,7 +30,7 @@ from typing import Any, Optional, Union
import
torch
import
torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Qwen2Moe
Config
from
vllm.attention
import
Attention
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
...
@@ -98,7 +98,7 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
...
@@ -98,7 +98,7 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Qwen2Moe
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
):
):
...
@@ -256,7 +256,7 @@ class Qwen2MoeDecoderLayer(nn.Module):
...
@@ -256,7 +256,7 @@ class Qwen2MoeDecoderLayer(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Qwen2Moe
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
...
vllm/model_executor/models/qwen3_moe.py
View file @
65552b47
...
@@ -28,7 +28,7 @@ from typing import Any, Optional, Union
...
@@ -28,7 +28,7 @@ from typing import Any, Optional, Union
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Qwen3Moe
Config
from
vllm.attention
import
Attention
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
...
@@ -101,7 +101,7 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
...
@@ -101,7 +101,7 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Qwen3Moe
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
enable_eplb
:
bool
=
False
,
enable_eplb
:
bool
=
False
,
...
@@ -278,7 +278,7 @@ class Qwen3MoeDecoderLayer(nn.Module):
...
@@ -278,7 +278,7 @@ class Qwen3MoeDecoderLayer(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Qwen3Moe
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment