"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "8f4bd3b47963e05304174e616cc133c8a401bd04"
Commit dc027d74 authored by 王敏's avatar 王敏
Browse files

[feat]支持glm4_moe_mtp使用torch compile,实现mtp cudagraph模式

parent 838b4509
...@@ -49,6 +49,7 @@ from .glm4_moe import ( ...@@ -49,6 +49,7 @@ from .glm4_moe import (
get_spec_layer_idx_from_weight_name, get_spec_layer_idx_from_weight_name,
) )
from .utils import maybe_prefix from .utils import maybe_prefix
from vllm.compilation.decorators import support_torch_compile
class SharedHead(nn.Module): class SharedHead(nn.Module):
...@@ -184,6 +185,7 @@ class Glm4MoeMultiTokenPredictor(nn.Module): ...@@ -184,6 +185,7 @@ class Glm4MoeMultiTokenPredictor(nn.Module):
return logits return logits
@support_torch_compile
class Glm4MoeMTP(nn.Module, Glm4MixtureOfExperts): class Glm4MoeMTP(nn.Module, Glm4MixtureOfExperts):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment