[feat]支持glm4_moe_mtp使用torch compile，实现mtp cudagraph模式

dc027d74 · 王敏 · 838b4509 · dc027d74
Commit dc027d74 authored Feb 13, 2026 by 王敏
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 0 deletions

vllm/model_executor/models/glm4_moe_mtp.py vllm/model_executor/models/glm4_moe_mtp.py +2 -0

No files found.
--- a/vllm/model_executor/models/glm4_moe_mtp.py
+++ b/vllm/model_executor/models/glm4_moe_mtp.py
@@ -49,6 +49,7 @@ from .glm4_moe import (
    get_spec_layer_idx_from_weight_name,
 )
 from .utils import maybe_prefix
+from vllm.compilation.decorators import support_torch_compile


 class SharedHead(nn.Module):
@@ -184,6 +185,7 @@ class Glm4MoeMultiTokenPredictor(nn.Module):
        return logits


+@support_torch_compile
 class Glm4MoeMTP(nn.Module, Glm4MixtureOfExperts):
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__()