Unverified Commit a4b0d5c9 authored by Yuxuan Zhang's avatar Yuxuan Zhang Committed by GitHub
Browse files

GLM-4.5 and GLM-4.5-Air both support (#8804)

parent 40e3b2be
...@@ -785,7 +785,7 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM): ...@@ -785,7 +785,7 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
) )
def determine_num_fused_shared_experts( def determine_num_fused_shared_experts(
self, architecture: str = "DeepseekV3ForCausalLM" self, architecture: str = "Glm4MoeForCausalLM"
): ):
self.num_fused_shared_experts = 0 self.num_fused_shared_experts = 0
if global_server_args_dict["disable_shared_experts_fusion"]: if global_server_args_dict["disable_shared_experts_fusion"]:
...@@ -797,7 +797,6 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM): ...@@ -797,7 +797,6 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
not _is_cuda not _is_cuda
or torch.cuda.get_device_capability("cuda") < (8, 0) or torch.cuda.get_device_capability("cuda") < (8, 0)
or self.config.architectures[0] != architecture or self.config.architectures[0] != architecture
or self.config.n_routed_experts != 128
or self.config.n_shared_experts != 1 or self.config.n_shared_experts != 1
): ):
disable_reason = "Only GLM-4.5 on NV-platform with capability >= 80 can use shared experts fusion optimization." disable_reason = "Only GLM-4.5 on NV-platform with capability >= 80 can use shared experts fusion optimization."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment