Unverified Commit a06912ad authored by Li Hui's avatar Li Hui Committed by GitHub
Browse files

Fix judgment condition for enabling Deepseek V3/R1 shared expert fusion optimization (#7371)

parent 97011abc
...@@ -1731,12 +1731,12 @@ class DeepseekV2ForCausalLM(nn.Module): ...@@ -1731,12 +1731,12 @@ class DeepseekV2ForCausalLM(nn.Module):
disable_reason = None disable_reason = None
if ( if (
not _is_cuda not _is_cuda
or torch.cuda.get_device_capability("cuda") < (9, 0) or torch.cuda.get_device_capability("cuda") < (8, 0)
or self.config.architectures[0] != architecture or self.config.architectures[0] != architecture
or self.config.n_routed_experts != 256 or self.config.n_routed_experts != 256
or self.config.n_shared_experts != 1 or self.config.n_shared_experts != 1
): ):
disable_reason = "Only Deepseek V3/R1 on NV-platform with capability >= 90 can use shared experts fusion optimization." disable_reason = "Only Deepseek V3/R1 on NV-platform with capability >= 80 can use shared experts fusion optimization."
elif ( elif (
global_server_args_dict["enable_deepep_moe"] global_server_args_dict["enable_deepep_moe"]
or global_server_args_dict["enable_ep_moe"] or global_server_args_dict["enable_ep_moe"]
...@@ -2040,7 +2040,7 @@ class DeepseekV2ForCausalLM(nn.Module): ...@@ -2040,7 +2040,7 @@ class DeepseekV2ForCausalLM(nn.Module):
if self.num_fused_shared_experts > 0: if self.num_fused_shared_experts > 0:
assert self.num_fused_shared_experts == 1 assert self.num_fused_shared_experts == 1
logger.info("Shared experts fusion optimization enabled.") log_info_on_rank0(logger, "Shared experts fusion optimization enabled.")
params_dict = dict(self.named_parameters()) params_dict = dict(self.named_parameters())
weight_names = [] weight_names = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment