Commit 2799735a authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.15.1-dev-wm' into 'v0.15.1-dev'

[feat]支持glm4_moe_mtp使用torch compile,实现mtp cudagraph模式

See merge request dcutoolkit/deeplearing/vllm!436
parents 838b4509 dc027d74
......@@ -49,6 +49,7 @@ from .glm4_moe import (
get_spec_layer_idx_from_weight_name,
)
from .utils import maybe_prefix
from vllm.compilation.decorators import support_torch_compile
class SharedHead(nn.Module):
......@@ -184,6 +185,7 @@ class Glm4MoeMultiTokenPredictor(nn.Module):
return logits
@support_torch_compile
class Glm4MoeMTP(nn.Module, Glm4MixtureOfExperts):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment