Merge branch 'v0.15.1-dev-wm' into 'v0.15.1-dev'

[feat]支持glm4_moe_mtp使用torch compile，实现mtp cudagraph模式 See merge request dcutoolkit/deeplearing/vllm!436

Merge branch 'v0.15.1-dev-wm' into 'v0.15.1-dev'
[feat]支持glm4_moe_mtp使用torch compile，实现mtp cudagraph模式 See merge request dcutoolkit/deeplearing/vllm!436
2799735a · zhuwenwen · 838b4509 · dc027d74 · 2799735a
Commit 2799735a authored Feb 14, 2026 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 0 deletions

vllm/model_executor/models/glm4_moe_mtp.py vllm/model_executor/models/glm4_moe_mtp.py +2 -0

No files found.
--- a/vllm/model_executor/models/glm4_moe_mtp.py
+++ b/vllm/model_executor/models/glm4_moe_mtp.py
@@ -49,6 +49,7 @@ from .glm4_moe import (
    get_spec_layer_idx_from_weight_name,
 )
 from .utils import maybe_prefix
+from vllm.compilation.decorators import support_torch_compile


 class SharedHead(nn.Module):
@@ -184,6 +185,7 @@ class Glm4MoeMultiTokenPredictor(nn.Module):
        return logits


+@support_torch_compile
 class Glm4MoeMTP(nn.Module, Glm4MixtureOfExperts):
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__()