Commit dc027d74 authored by 王敏's avatar 王敏
Browse files

[feat]支持glm4_moe_mtp使用torch compile,实现mtp cudagraph模式

parent 838b4509
......@@ -49,6 +49,7 @@ from .glm4_moe import (
get_spec_layer_idx_from_weight_name,
)
from .utils import maybe_prefix
from vllm.compilation.decorators import support_torch_compile
class SharedHead(nn.Module):
......@@ -184,6 +185,7 @@ class Glm4MoeMultiTokenPredictor(nn.Module):
return logits
@support_torch_compile
class Glm4MoeMTP(nn.Module, Glm4MixtureOfExperts):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment