Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
afe3ea1e
Commit
afe3ea1e
authored
Mar 02, 2026
by
zhuwenwen
Browse files
[Model] GLM adaptation
parent
2544deb6
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
13 additions
and
3 deletions
+13
-3
benchmarks/kernels/benchmark_moe.py
benchmarks/kernels/benchmark_moe.py
+1
-0
tests/models/registry.py
tests/models/registry.py
+3
-0
tests/models/test_initialization.py
tests/models/test_initialization.py
+1
-1
vllm/config/speculative.py
vllm/config/speculative.py
+1
-1
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+5
-1
vllm/model_executor/models/registry.py
vllm/model_executor/models/registry.py
+1
-0
vllm/transformers_utils/model_arch_config_convertor.py
vllm/transformers_utils/model_arch_config_convertor.py
+1
-0
No files found.
benchmarks/kernels/benchmark_moe.py
View file @
afe3ea1e
...
@@ -769,6 +769,7 @@ def main(args: argparse.Namespace):
...
@@ -769,6 +769,7 @@ def main(args: argparse.Namespace):
"DeepseekV2ForCausalLM"
,
"DeepseekV2ForCausalLM"
,
"DeepseekV3ForCausalLM"
,
"DeepseekV3ForCausalLM"
,
"DeepseekV32ForCausalLM"
,
"DeepseekV32ForCausalLM"
,
"GlmMoeDsaForCausalLM"
,
"Glm4MoeForCausalLM"
,
"Glm4MoeForCausalLM"
,
"Glm4MoeLiteForCausalLM"
,
"Glm4MoeLiteForCausalLM"
,
"NemotronHForCausalLM"
,
"NemotronHForCausalLM"
,
...
...
tests/models/registry.py
View file @
afe3ea1e
...
@@ -279,6 +279,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -279,6 +279,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
os
.
path
.
join
(
models_path_prefix
,
"zai-org/GLM-4.7-Flash"
),
os
.
path
.
join
(
models_path_prefix
,
"zai-org/GLM-4.7-Flash"
),
min_transformers_version
=
"5.0.0"
,
min_transformers_version
=
"5.0.0"
,
),
),
"GlmMoeDsaForCausalLM"
:
_HfExamplesInfo
(
"zai-org/GLM-5"
,
min_transformers_version
=
"5.0.1"
,
is_available_online
=
False
),
"GPT2LMHeadModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"openai-community/gpt2"
),
{
"alias"
:
os
.
path
.
join
(
models_path_prefix
,
"gpt2"
)}),
"GPT2LMHeadModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"openai-community/gpt2"
),
{
"alias"
:
os
.
path
.
join
(
models_path_prefix
,
"gpt2"
)}),
"GPTBigCodeForCausalLM"
:
_HfExamplesInfo
(
"GPTBigCodeForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"bigcode/starcoder"
),
os
.
path
.
join
(
models_path_prefix
,
"bigcode/starcoder"
),
...
...
tests/models/test_initialization.py
View file @
afe3ea1e
...
@@ -97,7 +97,7 @@ def can_initialize(
...
@@ -97,7 +97,7 @@ def can_initialize(
"pickle error when loading `transformers.models.auto.CONFIG_MAPPING`"
"pickle error when loading `transformers.models.auto.CONFIG_MAPPING`"
)
)
if
model_arch
==
"DeepseekV32ForCausalLM"
:
if
model_arch
in
[
"DeepseekV32ForCausalLM"
,
"GlmMoeDsaForCausalLM"
]
:
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
capability
=
current_platform
.
get_device_capability
()
capability
=
current_platform
.
get_device_capability
()
...
...
vllm/config/speculative.py
View file @
afe3ea1e
...
@@ -176,7 +176,7 @@ class SpeculativeConfig:
...
@@ -176,7 +176,7 @@ class SpeculativeConfig:
@
staticmethod
@
staticmethod
def
hf_config_override
(
hf_config
:
PretrainedConfig
)
->
PretrainedConfig
:
def
hf_config_override
(
hf_config
:
PretrainedConfig
)
->
PretrainedConfig
:
initial_architecture
=
hf_config
.
architectures
[
0
]
initial_architecture
=
hf_config
.
architectures
[
0
]
if
hf_config
.
model_type
in
(
"deepseek_v3"
,
"deepseek_v32"
):
if
hf_config
.
model_type
in
(
"deepseek_v3"
,
"deepseek_v32"
,
"glm_moe_dsa"
):
hf_config
.
model_type
=
"deepseek_mtp"
hf_config
.
model_type
=
"deepseek_mtp"
if
hf_config
.
model_type
==
"deepseek_mtp"
:
if
hf_config
.
model_type
==
"deepseek_mtp"
:
n_predict
=
getattr
(
hf_config
,
"num_nextn_predict_layers"
,
None
)
n_predict
=
getattr
(
hf_config
,
"num_nextn_predict_layers"
,
None
)
...
...
vllm/model_executor/models/deepseek_v2.py
View file @
afe3ea1e
...
@@ -848,7 +848,7 @@ class DeepseekV2MLAAttention(nn.Module):
...
@@ -848,7 +848,7 @@ class DeepseekV2MLAAttention(nn.Module):
qk_rope_head_dim
,
qk_rope_head_dim
,
max_position
=
max_position_embeddings
,
max_position
=
max_position_embeddings
,
rope_parameters
=
config
.
rope_parameters
,
rope_parameters
=
config
.
rope_parameters
,
is_neox_style
=
True
,
is_neox_style
=
not
getattr
(
config
,
"indexer_rope_interleave"
,
True
)
,
)
)
self
.
indexer
=
Indexer
(
self
.
indexer
=
Indexer
(
vllm_config
,
vllm_config
,
...
@@ -1557,6 +1557,10 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
...
@@ -1557,6 +1557,10 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
pass
pass
class
GlmMoeDsaForCausalLM
(
DeepseekV2ForCausalLM
):
pass
# Compatibility with
# Compatibility with
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
def
get_spec_layer_idx_from_weight_name
(
def
get_spec_layer_idx_from_weight_name
(
...
...
vllm/model_executor/models/registry.py
View file @
afe3ea1e
...
@@ -113,6 +113,7 @@ _TEXT_GENERATION_MODELS = {
...
@@ -113,6 +113,7 @@ _TEXT_GENERATION_MODELS = {
"Glm4ForCausalLM"
:
(
"glm4"
,
"Glm4ForCausalLM"
),
"Glm4ForCausalLM"
:
(
"glm4"
,
"Glm4ForCausalLM"
),
"Glm4MoeForCausalLM"
:
(
"glm4_moe"
,
"Glm4MoeForCausalLM"
),
"Glm4MoeForCausalLM"
:
(
"glm4_moe"
,
"Glm4MoeForCausalLM"
),
"Glm4MoeLiteForCausalLM"
:
(
"glm4_moe_lite"
,
"Glm4MoeLiteForCausalLM"
),
"Glm4MoeLiteForCausalLM"
:
(
"glm4_moe_lite"
,
"Glm4MoeLiteForCausalLM"
),
"GlmMoeDsaForCausalLM"
:
(
"deepseek_v2"
,
"GlmMoeDsaForCausalLM"
),
"GptOssForCausalLM"
:
(
"gpt_oss"
,
"GptOssForCausalLM"
),
"GptOssForCausalLM"
:
(
"gpt_oss"
,
"GptOssForCausalLM"
),
"GPT2LMHeadModel"
:
(
"gpt2"
,
"GPT2LMHeadModel"
),
"GPT2LMHeadModel"
:
(
"gpt2"
,
"GPT2LMHeadModel"
),
"GPTBigCodeForCausalLM"
:
(
"gpt_bigcode"
,
"GPTBigCodeForCausalLM"
),
"GPTBigCodeForCausalLM"
:
(
"gpt_bigcode"
,
"GPTBigCodeForCausalLM"
),
...
...
vllm/transformers_utils/model_arch_config_convertor.py
View file @
afe3ea1e
...
@@ -189,6 +189,7 @@ class ModelArchConfigConvertorBase:
...
@@ -189,6 +189,7 @@ class ModelArchConfigConvertorBase:
"deepseek_v3"
,
"deepseek_v3"
,
"deepseek_v32"
,
"deepseek_v32"
,
"deepseek_mtp"
,
"deepseek_mtp"
,
"glm_moe_dsa"
,
"glm4_moe_lite"
,
"glm4_moe_lite"
,
"glm4_moe_lite_mtp"
,
"glm4_moe_lite_mtp"
,
"kimi_k2"
,
"kimi_k2"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment